Casting
pl.Expr.cast
serves as the primary function for type conversion in Polars. It includes a keyword argument, strict
, which, by default, is set to True
and will raise an exception if a conversion error occurs. Alternatively, you can set strict=False
. In this case, if a conversion error occurs, the values will be set to null
.
Setup
import pandas as pd
import polars as pl
data = {
"integers": [1, 2, 3, 4, 5],
"big_integers": [1, 10000002, 3, 10000004, 10000005],
"floats": [4.0, 5.0, 6.0, 7.0, 8.0],
"floats_with_decimal": [4.532, 5.5, 6.5, 7.5, 8.5],
"floats_as_string": ["4.0", "5.0", "6.0", "7.0", "8.0"],
"strings_not_float": ["4.0", "not_a_number", "6.0", "7.0", "8.0"],
"bools": [True, False, True, False, True],
}
shape: (5, 7)
┌──────────┬──────────────┬────────┬─────────────────────┬──────────────────┬───────────────────┬───────┐
│ integers ┆ big_integers ┆ floats ┆ floats_with_decimal ┆ floats_as_string ┆ strings_not_float ┆ bools │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ f64 ┆ f64 ┆ str ┆ str ┆ bool │
╞══════════╪══════════════╪════════╪═════════════════════╪══════════════════╪═══════════════════╪═══════╡
│ 1 ┆ 1 ┆ 4.0 ┆ 4.532 ┆ 4.0 ┆ 4.0 ┆ true │
│ 2 ┆ 10000002 ┆ 5.0 ┆ 5.5 ┆ 5.0 ┆ not_a_number ┆ false │
│ 3 ┆ 3 ┆ 6.0 ┆ 6.5 ┆ 6.0 ┆ 6.0 ┆ true │
│ 4 ┆ 10000004 ┆ 7.0 ┆ 7.5 ┆ 7.0 ┆ 7.0 ┆ false │
│ 5 ┆ 10000005 ┆ 8.0 ┆ 8.5 ┆ 8.0 ┆ 8.0 ┆ true │
└──────────┴──────────────┴────────┴─────────────────────┴──────────────────┴───────────────────┴───────┘
integers int64
big_integers int64
floats float64
floats_with_decimal float64
floats_as_string object
strings_not_float object
bools bool
dtype: object
integers big_integers floats floats_with_decimal floats_as_string strings_not_float bools
0 1 1 4.0 4.532 4.0 4.0 True
1 2 10000002 5.0 5.500 5.0 not_a_number False
2 3 3 6.0 6.500 6.0 6.0 True
3 4 10000004 7.0 7.500 7.0 7.0 False
4 5 10000005 8.0 8.500 8.0 8.0 True
Numerics
out_pl = df_pl.select(
pl.col("integers").cast(pl.Float32).alias("integers_as_floats"),
pl.col("floats").cast(pl.Int32).alias("floats_as_integers"),
pl.col("floats_with_decimal")
.cast(pl.Int32)
.alias("floats_with_decimal_as_integers"),
)
print(out_pl)
shape: (5, 3)
┌────────────────────┬────────────────────┬─────────────────────────────────┐
│ integers_as_floats ┆ floats_as_integers ┆ floats_with_decimal_as_integers │
│ --- ┆ --- ┆ --- │
│ f32 ┆ i32 ┆ i32 │
╞════════════════════╪════════════════════╪═════════════════════════════════╡
│ 1.0 ┆ 4 ┆ 4 │
│ 2.0 ┆ 5 ┆ 5 │
│ 3.0 ┆ 6 ┆ 6 │
│ 4.0 ┆ 7 ┆ 7 │
│ 5.0 ┆ 8 ┆ 8 │
└────────────────────┴────────────────────┴─────────────────────────────────┘
out_pd = df_pd.assign(
integers_as_floats=lambda df_: df_.integers.astype("float32"),
floats_as_integers=lambda df_: df_.floats.astype("int32"),
floats_with_decimal_as_integers=lambda df_: df_.floats_with_decimal.astype("int32"),
).drop(columns=df_pd.columns)
print(out_pd.dtypes, end="\n" * 2)
print(out_pd)
Downcast
out_pl = df_pl.select(
pl.col("integers").cast(pl.Int16).alias("integers_smallfootprint"),
pl.col("floats").cast(pl.Float32).alias("floats_smallfootprint"),
)
print(out_pl)
shape: (5, 2)
┌─────────────────────────┬───────────────────────┐
│ integers_smallfootprint ┆ floats_smallfootprint │
│ --- ┆ --- │
│ i16 ┆ f32 │
╞═════════════════════════╪═══════════════════════╡
│ 1 ┆ 4.0 │
│ 2 ┆ 5.0 │
│ 3 ┆ 6.0 │
│ 4 ┆ 7.0 │
│ 5 ┆ 8.0 │
└─────────────────────────┴───────────────────────┘
Overflow
strict=True
try:
out_pl = df_pl.select(pl.col("big_integers").cast(pl.Int8))
print(out_pl)
except Exception as e:
print(e)
strict conversion from `i64` to `i8` failed for column: big_integers, value(s) [10000002, 10000004, 10000005]; if you were trying to cast Utf8 to temporal dtypes, consider using `strptime`
strict=False
pd.Series.astype(..)
This behavior might not be as you expected.
out_pd = df_pd.assign(big_integers=lambda df_: df_.big_integers.astype("int8")).drop(
columns=df_pd.columns.drop(["big_integers"])
)
print(out_pd.dtypes, end="\n" * 2)
print(out_pd)
pd.to_numeric(.., downcast=..)
Alternatively, pd.to_numeric
will do its best to downcast the resulting data to the smallest numerical dtype as specified in the downcast
parameter.
out_pd = df_pd.assign(
big_integers=lambda df_: pd.to_numeric(df_.big_integers, downcast="integer")
).drop(columns=df_pd.columns.drop(["big_integers"]))
print(out_pd.dtypes, end="\n" * 2)
print(out_pd)
big_integers
is converted from int64
to int32
.
Strings
Numeric values
out_pl = df_pl.select(
pl.col("integers").cast(pl.Utf8),
pl.col("floats").cast(pl.Utf8),
pl.col("floats_as_string").cast(pl.Float64),
)
print(out_pl)
shape: (5, 3)
┌──────────┬────────┬──────────────────┐
│ integers ┆ floats ┆ floats_as_string │
│ --- ┆ --- ┆ --- │
│ str ┆ str ┆ f64 │
╞══════════╪════════╪══════════════════╡
│ 1 ┆ 4.0 ┆ 4.0 │
│ 2 ┆ 5.0 ┆ 5.0 │
│ 3 ┆ 6.0 ┆ 6.0 │
│ 4 ┆ 7.0 ┆ 7.0 │
│ 5 ┆ 8.0 ┆ 8.0 │
└──────────┴────────┴──────────────────┘
out_pd = df_pd.assign(
integers=lambda df_: df_.integers.astype(str),
floats=lambda df_: df_.floats.astype(str),
floats_as_string=lambda df_: df_.floats_as_string.astype("float64"),
).drop(columns=df_pd.columns.drop(["integers", "floats", "floats_as_string"]))
print(out_pd.dtypes, end="\n" * 2)
print(out_pd)
Non-numeric values
-
The behavior of
pl.col().cast(.., strict=True)
inPolars
behaves similarly topd.to_numeric(.., errors=raise)
inPandas
in this example. -
The behavior of
pl.col().cast(.., strict=False)
inPolars
behaves similarly topd.to_numeric(.., errors=coerce)
inPandas
in this example.
strict=True
try:
out_pl = df_pl.select(pl.col("strings_not_float").cast(pl.Float64))
print(out_pl)
except Exception as e:
print(e)
strict conversion from `str` to `f64` failed for column: strings_not_float, value(s) ["not_a_number"]; if you were trying to cast Utf8 to temporal dtypes, consider using `strptime`
strict=False
erros=raise
try:
out_pd = df_pd.assign(
strings_not_float=lambda df_: pd.to_numeric(df_.strings_not_float)
).drop(columns=df_pd.columns.drop(["strings_not_float"]))
print(out_pd)
except Exception as e:
print(e)
erros=coerce
Booleans
Reference
The examples in this section have been adapted from the Polars
user guide.