Skip to content

Describe method for expressions

At the moment, Polars lacks built-in expressions for generating a describe summary. Nevertheless, it is possible to create custom expressions to achieve a similar result manually.

from datetime import date

import polars as pl


def to_describe(col, prefix=""):
    prefix = prefix or f"{col}_"
    return [
        pl.col(col).count().alias(f"{prefix}count"),
        pl.col(col).is_null().sum().alias(f"{prefix}null_count"),
        pl.col(col).mean().alias(f"{prefix}mean"),
        pl.col(col).std().alias(f"{prefix}std"),
        pl.col(col).min().alias(f"{prefix}min"),
        pl.col(col).quantile(0.25).alias(f"{prefix}25%"),
        pl.col(col).quantile(0.5).alias(f"{prefix}50%"),
        pl.col(col).quantile(0.75).alias(f"{prefix}75%"),
        pl.col(col).max().alias(f"{prefix}max"),
    ]


df = pl.DataFrame(
    {
        "date": [
            date(2023, 9, 5),
            date(2023, 9, 25),
            date(2023, 10, 5),
            date(2023, 10, 25),
            date(2023, 11, 5),
            date(2023, 11, 25),
        ],
        "a": [1, 3, 2, 15, 10, None],
        "b": [None, 11, 13, 12, 115, 110],
    }
)
out = (
    df.group_by(pl.col("date").dt.month().alias("month"))
    .agg(*to_describe("a"), *to_describe("b"))
    .sort("month")
)
print(out)
shape: (3, 19)
┌───────┬─────────┬──────────────┬────────┬───┬───────┬───────┬───────┬───────┐
│ month ┆ a_count ┆ a_null_count ┆ a_mean ┆ … ┆ b_25% ┆ b_50% ┆ b_75% ┆ b_max │
│ ---   ┆ ---     ┆ ---          ┆ ---    ┆   ┆ ---   ┆ ---   ┆ ---   ┆ ---   │
│ u32   ┆ u32     ┆ u32          ┆ f64    ┆   ┆ f64   ┆ f64   ┆ f64   ┆ i64   │
╞═══════╪═════════╪══════════════╪════════╪═══╪═══════╪═══════╪═══════╪═══════╡
│ 9     ┆ 2       ┆ 0            ┆ 2.0    ┆ … ┆ 11.0  ┆ 11.0  ┆ 11.0  ┆ 11    │
│ 10    ┆ 2       ┆ 0            ┆ 8.5    ┆ … ┆ 12.0  ┆ 13.0  ┆ 13.0  ┆ 13    │
│ 11    ┆ 2       ┆ 1            ┆ 10.0   ┆ … ┆ 110.0 ┆ 115.0 ┆ 115.0 ┆ 115   │
└───────┴─────────┴──────────────┴────────┴───┴───────┴───────┴───────┴───────┘