Basic operators
Setup
import numpy as np
import pandas as pd
import polars as pl
np.random.seed(42)
data = {
"nrs": [1, 2, 3, 4, 5],
"names": ["foo", "ham", "spam", "egg", "baz"],
"random": np.random.rand(5),
"groups": ["A", "A", "B", "C", "B"],
}
shape: (5, 4)
┌─────┬───────┬──────────┬────────┐
│ nrs ┆ names ┆ random ┆ groups │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ f64 ┆ str │
╞═════╪═══════╪══════════╪════════╡
│ 1 ┆ foo ┆ 0.37454 ┆ A │
│ 2 ┆ ham ┆ 0.950714 ┆ A │
│ 3 ┆ spam ┆ 0.731994 ┆ B │
│ 4 ┆ egg ┆ 0.598658 ┆ C │
│ 5 ┆ baz ┆ 0.156019 ┆ B │
└─────┴───────┴──────────┴────────┘
Numerical
out_pl = df_pl.select(
(pl.col("nrs") + 5).alias("nrs + 5"),
(pl.col("nrs") - 5).alias("nrs - 5"),
(pl.col("nrs") * pl.col("random")).alias("nrs * random"),
(pl.col("nrs") / pl.col("random")).alias("nrs / random"),
)
print(out_pl)
shape: (5, 4)
┌─────────┬─────────┬──────────────┬──────────────┐
│ nrs + 5 ┆ nrs - 5 ┆ nrs * random ┆ nrs / random │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ f64 ┆ f64 │
╞═════════╪═════════╪══════════════╪══════════════╡
│ 6 ┆ -4 ┆ 0.37454 ┆ 2.669941 │
│ 7 ┆ -3 ┆ 1.901429 ┆ 2.103681 │
│ 8 ┆ -2 ┆ 2.195982 ┆ 4.098395 │
│ 9 ┆ -1 ┆ 2.394634 ┆ 6.681606 │
│ 10 ┆ 0 ┆ 0.780093 ┆ 32.047453 │
└─────────┴─────────┴──────────────┴──────────────┘
Logical
out_pl = df_pl.select(
(pl.col("nrs") > 1).alias("nrs > 1"),
(pl.col("random") <= 0.5).alias("random <= .5"),
(pl.col("nrs") != 1).alias("nrs != 1"),
(pl.col("nrs") == 1).alias("nrs == 1"),
((pl.col("random") <= 0.5) & (pl.col("nrs") > 1)).alias("and_expr"),
((pl.col("random") <= 0.5) | (pl.col("nrs") > 1)).alias("or_expr"),
)
print(out_pl)
shape: (5, 6)
┌─────────┬──────────────┬──────────┬──────────┬──────────┬─────────┐
│ nrs > 1 ┆ random <= .5 ┆ nrs != 1 ┆ nrs == 1 ┆ and_expr ┆ or_expr │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ bool ┆ bool ┆ bool ┆ bool ┆ bool ┆ bool │
╞═════════╪══════════════╪══════════╪══════════╪══════════╪═════════╡
│ false ┆ true ┆ false ┆ true ┆ false ┆ true │
│ true ┆ false ┆ true ┆ false ┆ false ┆ true │
│ true ┆ false ┆ true ┆ false ┆ false ┆ true │
│ true ┆ false ┆ true ┆ false ┆ false ┆ true │
│ true ┆ true ┆ true ┆ false ┆ true ┆ true │
└─────────┴──────────────┴──────────┴──────────┴──────────┴─────────┘
out_pd = df_pd.assign(
**{
"nrs > 1": lambda df_: df_.nrs > 1,
"random <= .5": lambda df_: df_.random <= 0.5,
"nrs != 1": lambda df_: df_.nrs != 1,
"nrs == 1": lambda df_: df_.nrs == 1,
"and_expr": lambda df_: (df_.random <= 0.5) & (df_.nrs > 1),
"or_expr": lambda df_: (df_.random <= 0.5) | (df_.nrs > 1),
}
).drop(columns=df_pd.columns)
print(out_pd)
Reference
The examples in this section have been adapted from the Polars
user guide.