Skip to content

groupby_and_agg

laktory.polars.dataframe.groupby_and_agg ¤

FUNCTION DESCRIPTION
groupby_and_agg

Apply a groupby and create aggregation columns.

Functions¤

groupby_and_agg ¤

groupby_and_agg(df, groupby_columns=None, agg_expressions=None)

Apply a groupby and create aggregation columns.

PARAMETER DESCRIPTION
df

DataFrame

groupby_columns

List of column names to group by

TYPE: list[str] DEFAULT: None

agg_expressions

List of columns defining the aggregations

TYPE: list[Any] DEFAULT: None

Examples:

import laktory  # noqa: F401
import polars as pl

df0 = pl.DataFrame(
    {
        "symbol": ["AAPL", "AAPL"],
        "price": [200.0, 205.0],
        "tstamp": ["2023-09-01", "2023-09-02"],
    }
)

df = df0.laktory.groupby_and_agg(
    groupby_columns=["symbol"],
    agg_expressions=[
        {
            "name": "mean_price",
            "expr": "pl.col('price').mean()",
        },
    ],
)

print(df.glimpse(return_as_string=True))
'''
Rows: 1
Columns: 2
$ symbol     <str> 'AAPL'
$ mean_price <f64> 202.5
'''
Source code in laktory/polars/dataframe/groupby_and_agg.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def groupby_and_agg(
    df,
    groupby_columns: list[str] = None,
    agg_expressions: list[Any] = None,
) -> pl.DataFrame:
    """
    Apply a groupby and create aggregation columns.

    Parameters
    ----------
    df:
        DataFrame
    groupby_columns:
        List of column names to group by
    agg_expressions:
        List of columns defining the aggregations

    Examples
    --------
    ```py
    import laktory  # noqa: F401
    import polars as pl

    df0 = pl.DataFrame(
        {
            "symbol": ["AAPL", "AAPL"],
            "price": [200.0, 205.0],
            "tstamp": ["2023-09-01", "2023-09-02"],
        }
    )

    df = df0.laktory.groupby_and_agg(
        groupby_columns=["symbol"],
        agg_expressions=[
            {
                "name": "mean_price",
                "expr": "pl.col('price').mean()",
            },
        ],
    )

    print(df.glimpse(return_as_string=True))
    '''
    Rows: 1
    Columns: 2
    $ symbol     <str> 'AAPL'
    $ mean_price <f64> 202.5
    '''
    ```
    """
    from laktory.models.transformers.basechainnode import ChainNodeColumn

    # Parse inputs
    if agg_expressions is None:
        raise ValueError("`agg_expressions` must be specified")
    if groupby_columns is None:
        groupby_columns = []

    logger.info(f"Executing groupby ({groupby_columns}) with {agg_expressions}")

    # Groupby arguments
    groupby = []

    for c in groupby_columns:
        groupby += [c]

    # Agg arguments
    aggs = []
    for expr in agg_expressions:
        if not isinstance(expr, ChainNodeColumn):
            expr = ChainNodeColumn(**expr)

        expr.type = None
        aggs += [expr.eval(dataframe_backend="POLARS").alias(expr.name)]

    return df.group_by(groupby).agg(*aggs)