Skip to content

schema_flat

laktory.polars.dataframe.schema_flat ¤

Functions¤

schema_flat ¤

schema_flat(df)

Returns a flattened list of columns

PARAMETER DESCRIPTION
df

Input DataFrame

TYPE: DataFrame

RETURNS DESCRIPTION
list[str]

List of columns

Examples:

import laktory  # noqa: F401
import polars as pl

df = pl.DataFrame(
    {
        "indexx": [1, 2, 3],
        "stock": [
            {"symbol": "AAPL", "name": "Apple"},
            {"symbol": "MSFT", "name": "Microsoft"},
            {"symbol": "GOOGL", "name": "Google"},
        ],
        "prices": [
            [{"open": 1, "close": 2}, {"open": 1, "close": 2}],
            [{"open": 1, "close": 2}, {"open": 1, "close": 2}],
            [{"open": 1, "close": 2}, {"open": 1, "close": 2}],
        ],
    }
)

print(df.laktory.schema_flat())
'''
[
    'indexx',
    'stock',
    'stock.symbol',
    'stock.name',
    'prices',
    'prices[*].open',
    'prices[*].close',
]
'''
Source code in laktory/polars/dataframe/schema_flat.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def schema_flat(df: pl.DataFrame) -> list[str]:
    """
    Returns a flattened list of columns

    Parameters
    ----------
    df:
        Input DataFrame

    Returns
    -------
    :
        List of columns

    Examples
    --------
    ```py
    import laktory  # noqa: F401
    import polars as pl

    df = pl.DataFrame(
        {
            "indexx": [1, 2, 3],
            "stock": [
                {"symbol": "AAPL", "name": "Apple"},
                {"symbol": "MSFT", "name": "Microsoft"},
                {"symbol": "GOOGL", "name": "Google"},
            ],
            "prices": [
                [{"open": 1, "close": 2}, {"open": 1, "close": 2}],
                [{"open": 1, "close": 2}, {"open": 1, "close": 2}],
                [{"open": 1, "close": 2}, {"open": 1, "close": 2}],
            ],
        }
    )

    print(df.laktory.schema_flat())
    '''
    [
        'indexx',
        'stock',
        'stock.symbol',
        'stock.name',
        'prices',
        'prices[*].open',
        'prices[*].close',
    ]
    '''
    ```
    """

    def get_fields(schema):
        field_names = []
        for f_name, f_type in schema.items():

            if isinstance(f_type, pl.Struct):
                _field_names = get_fields(dict(f_type))
                field_names += [f_name]
                field_names += [f"{f_name}.{v}" for v in _field_names]

            elif isinstance(f_type, pl.List):
                field_names += [f_name]
                if isinstance(f_type.inner, pl.Struct):
                    _field_names = get_fields(dict(f_type.inner))
                    field_names += [f"{f_name}[*].{v}" for v in _field_names]

            else:

                field_names += [f_name]

        return field_names

    return get_fields(df.schema)