Skip to content

display

laktory.spark.dataframe.display ¤

Functions¤

display ¤

display(df, n=10, truncate=False, vertical=False, refresh_interval=3.0)

Prints the first n rows to the console. Compatible for both static and streaming dataframes. In the case of a streaming dataframe, console will be continuously updated until there is a keyboard input.

PARAMETER DESCRIPTION
df

Input DataFrame

TYPE: DataFrame

n

Number of rows to display

TYPE: int DEFAULT: 10

truncate

If set to True, truncate strings longer than 20 chars by default. If set to a number greater than one, truncates long strings to length truncate and align cells right.

TYPE: Union[bool, int] DEFAULT: False

vertical

If set to True, print output rows vertically (one line per column value).

TYPE: bool DEFAULT: False

refresh_interval

Pause duration, in seconds, between each update for streaming dataframes

TYPE: float DEFAULT: 3.0

RETURNS DESCRIPTION
None

None

Examples:

import laktory  # noqa: F401
import pandas as pd

df = spark.createDataFrame(
    pd.DataFrame(
        {
            "symbol": ["AAPL", "GOOGL"],
            "price": [200.0, 205.0],
            "tstamp": ["2023-09-01", "2023-09-01"],
        }
    )
)
df.laktory.display(n=5, refresh_interval=2)
Source code in laktory/spark/dataframe/display.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def display(
    df: DataFrame,
    n: int = 10,
    truncate: Union[bool, int] = False,
    vertical: bool = False,
    refresh_interval: float = 3.0,
) -> None:
    """
    Prints the first n rows to the console. Compatible for both static and
    streaming dataframes. In the case of a streaming dataframe, console will
    be continuously updated until there is a keyboard input.

    Parameters
    ----------
    df:
        Input DataFrame
    n:
        Number of rows to display
    truncate:
        If set to `True`, truncate strings longer than 20 chars by default. If
        set to a number greater than one, truncates long strings to length
        truncate and align cells right.
    vertical:
        If set to `True`, print output rows vertically (one line per column
        value).
    refresh_interval:
        Pause duration, in seconds, between each update for streaming
        dataframes


    Returns
    -------
    :
        None

    Examples
    --------

    ```py
    import laktory  # noqa: F401
    import pandas as pd

    df = spark.createDataFrame(
        pd.DataFrame(
            {
                "symbol": ["AAPL", "GOOGL"],
                "price": [200.0, 205.0],
                "tstamp": ["2023-09-01", "2023-09-01"],
            }
        )
    )
    df.laktory.display(n=5, refresh_interval=2)
    ```
    """

    if not df.isStreaming:
        df.show(n=n, truncate=truncate, vertical=vertical)

    else:

        # Start the streaming query
        query = (
            df.writeStream.outputMode("append")
            .format("memory")  # Store the results in-memory table
            .queryName("_laktory_tmp_view")
            .start()
        )

        try:
            while True:
                # Fetch and display the latest rows
                df.sparkSession.sql(f"SELECT * FROM _laktory_tmp_view LIMIT {n}").show(
                    truncate=truncate, vertical=vertical
                )
                time.sleep(refresh_interval)
        except KeyboardInterrupt:
            print("Stopped streaming display.")
        finally:
            query.stop()