From 639ab0a201776ba9d59fe2304393be321acf9c2b Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Tue, 14 Oct 2025 17:24:47 +0200 Subject: [PATCH 01/24] Implement Expr.single() --- .../frame/group_by/aggregations/dispatch.rs | 29 ++++ crates/polars-core/src/frame/group_by/mod.rs | 3 + .../src/expressions/aggregation.rs | 29 ++++ crates/polars-expr/src/planner.rs | 1 + crates/polars-expr/src/reduce/convert.rs | 5 +- .../{first_last.rs => first_last_single.rs} | 144 ++++++++++++++++-- crates/polars-expr/src/reduce/mod.rs | 2 +- crates/polars-plan/src/dsl/expr/mod.rs | 2 + crates/polars-plan/src/dsl/format.rs | 1 + crates/polars-plan/src/dsl/mod.rs | 5 + .../polars-plan/src/plans/aexpr/equality.rs | 1 + crates/polars-plan/src/plans/aexpr/mod.rs | 2 + crates/polars-plan/src/plans/aexpr/schema.rs | 4 +- .../polars-plan/src/plans/aexpr/traverse.rs | 33 ++-- .../conversion/dsl_to_ir/expr_expansion.rs | 8 + .../plans/conversion/dsl_to_ir/expr_to_ir.rs | 4 + .../src/plans/conversion/ir_to_dsl.rs | 4 + crates/polars-plan/src/plans/ir/format.rs | 1 + crates/polars-plan/src/plans/iterator.rs | 1 + .../optimizer/set_order/expr_pushdown.rs | 5 +- crates/polars-plan/src/plans/visitor/expr.rs | 1 + crates/polars-python/src/expr/general.rs | 3 + .../src/lazyframe/visitor/expr_nodes.rs | 5 + .../src/physical_plan/lower_expr.rs | 1 + .../src/physical_plan/lower_group_by.rs | 1 + py-polars/src/polars/_plr.pyi | 1 + py-polars/src/polars/expr/expr.py | 21 +++ 27 files changed, 286 insertions(+), 31 deletions(-) rename crates/polars-expr/src/reduce/{first_last.rs => first_last_single.rs} (72%) diff --git a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs index 6fc075b0b146..58dc04ce6d73 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs @@ -310,4 +310,33 @@ impl Series { }; s.restore_logical(out) } + + #[doc(hidden)] + pub unsafe fn agg_single(&self, groups: &GroupsType) -> Series { + // Prevent a rechunk for every individual group. + let s = if groups.len() > 1 { + self.rechunk() + } else { + self.clone() + }; + + let out = match groups { + GroupsType::Idx(groups) => { + let indices = groups + .all() + .iter() + .map(|idx| if idx.len() == 1 { Some(idx[0]) } else { None }) + .collect_ca(PlSmallStr::EMPTY); + s.take_unchecked(&indices) + }, + GroupsType::Slice { groups, .. } => { + let indices = groups + .iter() + .map(|&[first, len]| if len == 1 { Some(first) } else { None }) + .collect_ca(PlSmallStr::EMPTY); + s.take_unchecked(&indices) + }, + }; + s.restore_logical(out) + } } diff --git a/crates/polars-core/src/frame/group_by/mod.rs b/crates/polars-core/src/frame/group_by/mod.rs index c6841ea0bdbc..45fec60deaab 100644 --- a/crates/polars-core/src/frame/group_by/mod.rs +++ b/crates/polars-core/src/frame/group_by/mod.rs @@ -875,6 +875,7 @@ pub enum GroupByMethod { Mean, First, Last, + Single, Sum, Groups, NUnique, @@ -897,6 +898,7 @@ impl Display for GroupByMethod { Mean => "mean", First => "first", Last => "last", + Single => "single", Sum => "sum", Groups => "groups", NUnique => "n_unique", @@ -922,6 +924,7 @@ pub fn fmt_group_by_column(name: &str, method: GroupByMethod) -> PlSmallStr { Mean => format_pl_smallstr!("{name}_mean"), First => format_pl_smallstr!("{name}_first"), Last => format_pl_smallstr!("{name}_last"), + Single => format_pl_smallstr!("{name}_single"), Sum => format_pl_smallstr!("{name}_sum"), Groups => PlSmallStr::from_static("groups"), NUnique => format_pl_smallstr!("{name}_n_unique"), diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index e7ab7a16f698..009e2623bb4e 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -130,6 +130,14 @@ impl PhysicalExpr for AggregationExpr { } else { s.tail(Some(1)) }), + GroupByMethod::Single => Ok(match s.len() { + 1 => s.head(Some(1)), + n => { + return Err(polars_err!(ComputeError: + "aggregation 'single' expected a single value, got {n} values" + )); + }, + }), GroupByMethod::Sum => parallel_op_columns( |s| s.sum_reduce().map(|sc| sc.into_column(s.name().clone())), s, @@ -337,6 +345,27 @@ impl PhysicalExpr for AggregationExpr { let agg_s = s.agg_last(&groups); AggregatedScalar(agg_s.with_name(keep_name)) }, + GroupByMethod::Single => { + let (s, groups) = ac.get_final_aggregation(); + for gc in groups.group_count().iter() { + if let Some(n) = gc + && n == 0 + { + return Err(polars_err!(ComputeError: + "aggregation 'single' expected a single value, got an empty group" + )); + } + if let Some(n) = gc + && n > 1 + { + return Err(polars_err!(ComputeError: + "aggregation 'single' expected a single value, got a group with {n} values" + )); + } + } + let agg_s = s.agg_first(&groups); + AggregatedScalar(agg_s.with_name(keep_name)) + }, GroupByMethod::NUnique => { let (s, groups) = ac.get_final_aggregation(); let agg_s = s.agg_n_unique(&groups); diff --git a/crates/polars-expr/src/planner.rs b/crates/polars-expr/src/planner.rs index dee0463db1a0..b604d6590c71 100644 --- a/crates/polars-expr/src/planner.rs +++ b/crates/polars-expr/src/planner.rs @@ -378,6 +378,7 @@ fn create_physical_expr_inner( I::NUnique(_) => GBM::NUnique, I::First(_) => GBM::First, I::Last(_) => GBM::Last, + I::Single(_) => GBM::Single, I::Mean(_) => GBM::Mean, I::Implode(_) => GBM::Implode, I::Quantile { .. } => unreachable!(), diff --git a/crates/polars-expr/src/reduce/convert.rs b/crates/polars-expr/src/reduce/convert.rs index 286aa924f78f..e717fbfa6688 100644 --- a/crates/polars-expr/src/reduce/convert.rs +++ b/crates/polars-expr/src/reduce/convert.rs @@ -11,7 +11,9 @@ use crate::reduce::bitwise::{ new_bitwise_and_reduction, new_bitwise_or_reduction, new_bitwise_xor_reduction, }; use crate::reduce::count::{CountReduce, NullCountReduce}; -use crate::reduce::first_last::{new_first_reduction, new_last_reduction}; +use crate::reduce::first_last_single::{ + new_first_reduction, new_last_reduction, new_single_reduction, +}; use crate::reduce::len::LenReduce; use crate::reduce::mean::new_mean_reduction; use crate::reduce::min_max::{new_max_reduction, new_min_reduction}; @@ -51,6 +53,7 @@ pub fn into_reduction( }, IRAggExpr::First(input) => (new_first_reduction(get_dt(*input)?), *input), IRAggExpr::Last(input) => (new_last_reduction(get_dt(*input)?), *input), + IRAggExpr::Single(input) => (new_single_reduction(get_dt(*input)?), *input), IRAggExpr::Count { input, include_nulls, diff --git a/crates/polars-expr/src/reduce/first_last.rs b/crates/polars-expr/src/reduce/first_last_single.rs similarity index 72% rename from crates/polars-expr/src/reduce/first_last.rs rename to crates/polars-expr/src/reduce/first_last_single.rs index 83db6a724fc9..ada2b3c46893 100644 --- a/crates/polars-expr/src/reduce/first_last.rs +++ b/crates/polars-expr/src/reduce/first_last_single.rs @@ -1,4 +1,5 @@ #![allow(unsafe_op_in_unsafe_fn)] +use std::fmt::Debug; use std::marker::PhantomData; use polars_core::frame::row::AnyValueBufferTrusted; @@ -14,6 +15,10 @@ pub fn new_last_reduction(dtype: DataType) -> Box { new_reduction_with_policy::(dtype) } +pub fn new_single_reduction(dtype: DataType) -> Box { + new_reduction_with_policy::(dtype) +} + fn new_reduction_with_policy(dtype: DataType) -> Box { use DataType::*; use VecGroupedReduction as VGR; @@ -42,6 +47,9 @@ fn new_reduction_with_policy(dtype: DataType) -> Box usize; fn should_replace(new: u64, old: u64) -> bool; + fn is_single() -> bool { + false + } } struct First; @@ -68,9 +76,8 @@ impl Policy for Last { } } -#[allow(dead_code)] -struct Arbitrary; -impl Policy for Arbitrary { +struct Single; +impl Policy for Single { fn index(_len: usize) -> usize { 0 } @@ -78,10 +85,21 @@ impl Policy for Arbitrary { fn should_replace(_new: u64, old: u64) -> bool { old == 0 } + + fn is_single() -> bool { + true + } } struct NumFirstLastReducer(PhantomData<(P, T)>); +#[derive(Clone, Debug)] +struct Value { + value: Option, + seq: u64, + count: u64, +} + impl Clone for NumFirstLastReducer { fn clone(&self) -> Self { Self(PhantomData) @@ -94,10 +112,14 @@ where T: PolarsNumericType, { type Dtype = T; - type Value = (Option, u64); + type Value = Value; fn init(&self) -> Self::Value { - (None, 0) + Value { + value: None, + seq: 0, + count: 0, + } } fn cast_series<'a>(&self, s: &'a Series) -> Cow<'a, Series> { @@ -105,22 +127,27 @@ where } fn combine(&self, a: &mut Self::Value, b: &Self::Value) { - if P::should_replace(b.1, a.1) { - *a = *b; + if P::should_replace(b.seq, a.seq) { + *a = b.clone(); } + a.count += b.count; } fn reduce_one(&self, a: &mut Self::Value, b: Option, seq_id: u64) { - if P::should_replace(seq_id, a.1) { - *a = (b, seq_id); + if P::should_replace(seq_id, a.seq) { + a.value = b; + a.seq = seq_id; } + a.count += b.is_some() as u64; } fn reduce_ca(&self, v: &mut Self::Value, ca: &ChunkedArray, seq_id: u64) { - if !ca.is_empty() && P::should_replace(seq_id, v.1) { + if !ca.is_empty() && P::should_replace(seq_id, v.seq) { let val = ca.get(P::index(ca.len())); - *v = (val, seq_id); + v.value = val; + v.seq = seq_id; } + v.count += ca.len() as u64; } fn finish( @@ -130,7 +157,22 @@ where dtype: &DataType, ) -> PolarsResult { assert!(m.is_none()); // This should only be used with VecGroupedReduction. - let ca: ChunkedArray = v.into_iter().map(|(x, _s)| x).collect_ca(PlSmallStr::EMPTY); + if P::is_single() { + if v.iter().any(|v| v.count == 0) { + return Err(polars_err!(ComputeError: + "aggregation 'single' expected a single value, got an empty group" + )); + } + if let Some(Value { count: n, .. }) = v.iter().find(|v| v.count > 1) { + return Err(polars_err!(ComputeError: + "aggregation 'single' expected a single value, got a group with {n} values" + )); + } + } + let ca: ChunkedArray = v + .into_iter() + .map(|red_val| red_val.value) + .collect_ca(PlSmallStr::EMPTY); let s = ca.into_series(); unsafe { s.from_physical_unchecked(dtype) } } @@ -382,3 +424,81 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< self } } + +// #[derive(Clone)] +// struct SingleReducer { +// inner: R, +// } + +// #[derive(Clone)] +// struct SingleValue { +// value: (Option, u64), +// got_too_many: bool, +// } + +// impl, u64)>, T: Clone + Send + Sync + 'static> Reducer +// for SingleReducer +// { +// type Dtype = R::Dtype; +// type Value = SingleValue; + +// fn init(&self) -> Self::Value { +// SingleValue { +// value: self.inner.init(), +// got_too_many: false, +// } +// } + +// fn cast_series<'a>(&self, s: &'a Series) -> Cow<'a, Series> { +// self.inner.cast_series(s) +// } + +// fn combine(&self, a: &mut Self::Value, b: &Self::Value) { +// if a.got_too_many || b.got_too_many || (a.value.0.is_some() && b.value.0.is_some()) { +// a.got_too_many = true; +// } else { +// self.inner.combine(&mut a.value, &b.value) +// } +// } + +// fn reduce_one( +// &self, +// a: &mut Self::Value, +// b: Option<::Physical<'_>>, +// seq_id: u64, +// ) { +// if a.got_too_many || (a.value.0.is_some() && b.is_some()) { +// a.got_too_many = true; +// } else { +// self.inner.reduce_one(&mut a.value, b, seq_id) +// } +// } + +// fn reduce_ca(&self, v: &mut Self::Value, ca: &ChunkedArray, seq_id: u64) { +// if v.got_too_many || (v.value.0.is_some() && !ca.is_empty()) { +// v.got_too_many = true; +// } else { +// self.inner.reduce_ca(v, ca, seq_id) +// } +// } + +// fn finish( +// &self, +// v: Vec, +// m: Option, +// dtype: &DataType, +// ) -> PolarsResult { +// if v.iter().all(|x| x.value.0.is_none()) { +// return Err(polars_err!( +// ComputeError: "Single reduction got no non-null values in any group [amber]" +// )); +// } +// if v.iter().any(|x| x.got_too_many) { +// return Err(polars_err!( +// ComputeError: "Single reduction got no non-null values in any group [amber]" +// )); +// } +// let v = v.into_iter().map(|x| x.value).collect::>(); +// self.inner.finish(v, m, dtype) +// } +// } diff --git a/crates/polars-expr/src/reduce/mod.rs b/crates/polars-expr/src/reduce/mod.rs index 4d58c62f515d..8820166e3ca5 100644 --- a/crates/polars-expr/src/reduce/mod.rs +++ b/crates/polars-expr/src/reduce/mod.rs @@ -6,7 +6,7 @@ mod approx_n_unique; mod bitwise; mod convert; mod count; -mod first_last; +mod first_last_single; mod len; mod mean; mod min_max; diff --git a/crates/polars-plan/src/dsl/expr/mod.rs b/crates/polars-plan/src/dsl/expr/mod.rs index a504918a5d3e..68550b3f8582 100644 --- a/crates/polars-plan/src/dsl/expr/mod.rs +++ b/crates/polars-plan/src/dsl/expr/mod.rs @@ -37,6 +37,7 @@ pub enum AggExpr { NUnique(Arc), First(Arc), Last(Arc), + Single(Arc), Mean(Arc), Implode(Arc), Count { @@ -64,6 +65,7 @@ impl AsRef for AggExpr { NUnique(e) => e, First(e) => e, Last(e) => e, + Single(e) => e, Mean(e) => e, Implode(e) => e, Count { input, .. } => input, diff --git a/crates/polars-plan/src/dsl/format.rs b/crates/polars-plan/src/dsl/format.rs index c5295295ebdc..18c89a3984ec 100644 --- a/crates/polars-plan/src/dsl/format.rs +++ b/crates/polars-plan/src/dsl/format.rs @@ -113,6 +113,7 @@ impl fmt::Debug for Expr { Mean(expr) => write!(f, "{expr:?}.mean()"), First(expr) => write!(f, "{expr:?}.first()"), Last(expr) => write!(f, "{expr:?}.last()"), + Single(expr) => write!(f, "{expr:?}.item()"), Implode(expr) => write!(f, "{expr:?}.list()"), NUnique(expr) => write!(f, "{expr:?}.n_unique()"), Sum(expr) => write!(f, "{expr:?}.sum()"), diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs index f8f7f7ce4412..e0278f99f345 100644 --- a/crates/polars-plan/src/dsl/mod.rs +++ b/crates/polars-plan/src/dsl/mod.rs @@ -176,6 +176,11 @@ impl Expr { AggExpr::Last(Arc::new(self)).into() } + /// Get the single value in the group. If there are multiple values, an error is returned. + pub fn single(self) -> Self { + AggExpr::Single(Arc::new(self)).into() + } + /// GroupBy the group to a Series. pub fn implode(self) -> Self { AggExpr::Implode(Arc::new(self)).into() diff --git a/crates/polars-plan/src/plans/aexpr/equality.rs b/crates/polars-plan/src/plans/aexpr/equality.rs index 0f0038aa666a..a9b3da8cff7c 100644 --- a/crates/polars-plan/src/plans/aexpr/equality.rs +++ b/crates/polars-plan/src/plans/aexpr/equality.rs @@ -111,6 +111,7 @@ impl IRAggExpr { A::NUnique(_) | A::First(_) | A::Last(_) | + A::Single(_) | A::Mean(_) | A::Implode(_) | A::Sum(_) | diff --git a/crates/polars-plan/src/plans/aexpr/mod.rs b/crates/polars-plan/src/plans/aexpr/mod.rs index 9603bf3ff0e5..0c4f7d7d4c7c 100644 --- a/crates/polars-plan/src/plans/aexpr/mod.rs +++ b/crates/polars-plan/src/plans/aexpr/mod.rs @@ -48,6 +48,7 @@ pub enum IRAggExpr { NUnique(Node), First(Node), Last(Node), + Single(Node), Mean(Node), Implode(Node), Quantile { @@ -146,6 +147,7 @@ impl From for GroupByMethod { NUnique(_) => GroupByMethod::NUnique, First(_) => GroupByMethod::First, Last(_) => GroupByMethod::Last, + Single(_) => GroupByMethod::Single, Mean(_) => GroupByMethod::Mean, Implode(_) => GroupByMethod::Implode, Sum(_) => GroupByMethod::Sum, diff --git a/crates/polars-plan/src/plans/aexpr/schema.rs b/crates/polars-plan/src/plans/aexpr/schema.rs index 80d2c793b595..e92b16e12e5d 100644 --- a/crates/polars-plan/src/plans/aexpr/schema.rs +++ b/crates/polars-plan/src/plans/aexpr/schema.rs @@ -139,7 +139,8 @@ impl AExpr { Max { input: expr, .. } | Min { input: expr, .. } | First(expr) - | Last(expr) => ctx.arena.get(*expr).to_field_impl(ctx), + | Last(expr) + | Single(expr) => ctx.arena.get(*expr).to_field_impl(ctx), Sum(expr) => { let mut field = ctx.arena.get(*expr).to_field_impl(ctx)?; let dt = match field.dtype() { @@ -319,6 +320,7 @@ impl AExpr { | Agg(Min { input: expr, .. }) | Agg(First(expr)) | Agg(Last(expr)) + | Agg(Single(expr)) | Agg(Sum(expr)) | Agg(Median(expr)) | Agg(Mean(expr)) diff --git a/crates/polars-plan/src/plans/aexpr/traverse.rs b/crates/polars-plan/src/plans/aexpr/traverse.rs index ffec8d5c861f..545fe8d0359e 100644 --- a/crates/polars-plan/src/plans/aexpr/traverse.rs +++ b/crates/polars-plan/src/plans/aexpr/traverse.rs @@ -244,22 +244,24 @@ impl AExpr { impl IRAggExpr { pub fn get_input(&self) -> NodeInputs { use IRAggExpr::*; - use NodeInputs::*; + use NodeInputs as NI; + match self { - Min { input, .. } => Single(*input), - Max { input, .. } => Single(*input), - Median(input) => Single(*input), - NUnique(input) => Single(*input), - First(input) => Single(*input), - Last(input) => Single(*input), - Mean(input) => Single(*input), - Implode(input) => Single(*input), - Quantile { expr, quantile, .. } => Many(vec![*expr, *quantile]), - Sum(input) => Single(*input), - Count { input, .. } => Single(*input), - Std(input, _) => Single(*input), - Var(input, _) => Single(*input), - AggGroups(input) => Single(*input), + Min { input, .. } => NI::Single(*input), + Max { input, .. } => NI::Single(*input), + Median(input) => NI::Single(*input), + NUnique(input) => NI::Single(*input), + First(input) => NI::Single(*input), + Last(input) => NI::Single(*input), + Single(input) => NI::Single(*input), + Mean(input) => NI::Single(*input), + Implode(input) => NI::Single(*input), + Quantile { expr, quantile, .. } => NI::Many(vec![*expr, *quantile]), + Sum(input) => NI::Single(*input), + Count { input, .. } => NI::Single(*input), + Std(input, _) => NI::Single(*input), + Var(input, _) => NI::Single(*input), + AggGroups(input) => NI::Single(*input), } } pub fn set_input(&mut self, input: Node) { @@ -271,6 +273,7 @@ impl IRAggExpr { NUnique(input) => input, First(input) => input, Last(input) => input, + Single(input) => input, Mean(input) => input, Implode(input) => input, Quantile { expr, .. } => expr, diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_expansion.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_expansion.rs index a5c75cb4911f..feadd2b90a99 100644 --- a/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_expansion.rs +++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_expansion.rs @@ -467,6 +467,14 @@ fn expand_expression_rec( opt_flags, |e| Expr::Agg(AggExpr::Last(Arc::new(e))), )?, + AggExpr::Single(expr) => expand_single( + expr.as_ref(), + ignored_selector_columns, + schema, + out, + opt_flags, + |e| Expr::Agg(AggExpr::Single(Arc::new(e))), + )?, AggExpr::Mean(expr) => expand_single( expr.as_ref(), ignored_selector_columns, diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_to_ir.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_to_ir.rs index 4c952b33b1f6..4bae9d92e57c 100644 --- a/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_to_ir.rs +++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_to_ir.rs @@ -253,6 +253,10 @@ pub(super) fn to_aexpr_impl( let (input, output_name) = to_aexpr_mat_lit_arc!(input)?; (IRAggExpr::Last(input), output_name) }, + AggExpr::Single(input) => { + let (input, output_name) = to_aexpr_mat_lit_arc!(input)?; + (IRAggExpr::Single(input), output_name) + }, AggExpr::Mean(input) => { let (input, output_name) = to_aexpr_mat_lit_arc!(input)?; (IRAggExpr::Mean(input), output_name) diff --git a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs index 1c551afd2009..53f9577cdff8 100644 --- a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs +++ b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs @@ -122,6 +122,10 @@ pub fn node_to_expr(node: Node, expr_arena: &Arena) -> Expr { let exp = node_to_expr(expr, expr_arena); AggExpr::Last(Arc::new(exp)).into() }, + IRAggExpr::Single(expr) => { + let exp = node_to_expr(expr, expr_arena); + AggExpr::Single(Arc::new(exp)).into() + }, IRAggExpr::Implode(expr) => { let exp = node_to_expr(expr, expr_arena); AggExpr::Implode(Arc::new(exp)).into() diff --git a/crates/polars-plan/src/plans/ir/format.rs b/crates/polars-plan/src/plans/ir/format.rs index 257d2c2b6a5d..110e45815d30 100644 --- a/crates/polars-plan/src/plans/ir/format.rs +++ b/crates/polars-plan/src/plans/ir/format.rs @@ -452,6 +452,7 @@ impl Display for ExprIRDisplay<'_> { Mean(expr) => write!(f, "{}.mean()", self.with_root(expr)), First(expr) => write!(f, "{}.first()", self.with_root(expr)), Last(expr) => write!(f, "{}.last()", self.with_root(expr)), + Single(expr) => write!(f, "{}.single()", self.with_root(expr)), Implode(expr) => write!(f, "{}.implode()", self.with_root(expr)), NUnique(expr) => write!(f, "{}.n_unique()", self.with_root(expr)), Sum(expr) => write!(f, "{}.sum()", self.with_root(expr)), diff --git a/crates/polars-plan/src/plans/iterator.rs b/crates/polars-plan/src/plans/iterator.rs index bb8821c35038..f088da077f30 100644 --- a/crates/polars-plan/src/plans/iterator.rs +++ b/crates/polars-plan/src/plans/iterator.rs @@ -48,6 +48,7 @@ macro_rules! push_expr { NUnique(e) => $push($c, e), First(e) => $push($c, e), Last(e) => $push($c, e), + Single(e) => $push($c, e), Implode(e) => $push($c, e), Count { input, .. } => $push($c, input), Quantile { expr, .. } => $push($c, expr), diff --git a/crates/polars-plan/src/plans/optimizer/set_order/expr_pushdown.rs b/crates/polars-plan/src/plans/optimizer/set_order/expr_pushdown.rs index f9fd00eb7d55..e686f1d48b3e 100644 --- a/crates/polars-plan/src/plans/optimizer/set_order/expr_pushdown.rs +++ b/crates/polars-plan/src/plans/optimizer/set_order/expr_pushdown.rs @@ -217,7 +217,10 @@ fn get_frame_observing_impl( }, // Input order observing aggregations. - IRAggExpr::Implode(node) | IRAggExpr::First(node) | IRAggExpr::Last(node) => { + IRAggExpr::Implode(node) + | IRAggExpr::First(node) + | IRAggExpr::Last(node) + | IRAggExpr::Single(node) => { if rec!(*node).has_frame_ordering() { return Err(FrameOrderObserved); } diff --git a/crates/polars-plan/src/plans/visitor/expr.rs b/crates/polars-plan/src/plans/visitor/expr.rs index ca96d72d56de..6f148799e534 100644 --- a/crates/polars-plan/src/plans/visitor/expr.rs +++ b/crates/polars-plan/src/plans/visitor/expr.rs @@ -60,6 +60,7 @@ impl TreeWalker for Expr { NUnique(x) => NUnique(am(x, f)?), First(x) => First(am(x, f)?), Last(x) => Last(am(x, f)?), + Single(x) => Single(am(x, f)?), Mean(x) => Mean(am(x, f)?), Implode(x) => Implode(am(x, f)?), Count { input, include_nulls } => Count { input: am(input, f)?, include_nulls }, diff --git a/crates/polars-python/src/expr/general.rs b/crates/polars-python/src/expr/general.rs index 0dfc7b18fb4d..188e3a1b62c3 100644 --- a/crates/polars-python/src/expr/general.rs +++ b/crates/polars-python/src/expr/general.rs @@ -152,6 +152,9 @@ impl PyExpr { fn last(&self) -> Self { self.inner.clone().last().into() } + fn single(&self) -> Self { + self.inner.clone().single().into() + } fn implode(&self) -> Self { self.inner.clone().implode().into() } diff --git a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs index 0c534be2fb31..d7481ecbaa67 100644 --- a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs @@ -684,6 +684,11 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult> { arguments: vec![n.0], options: py.None(), }, + IRAggExpr::Single(n) => Agg { + name: "single".into_py_any(py)?, + arguments: vec![n.0], + options: py.None(), + }, IRAggExpr::Mean(n) => Agg { name: "mean".into_py_any(py)?, arguments: vec![n.0], diff --git a/crates/polars-stream/src/physical_plan/lower_expr.rs b/crates/polars-stream/src/physical_plan/lower_expr.rs index c8699dabbcf9..e1c84a36a65c 100644 --- a/crates/polars-stream/src/physical_plan/lower_expr.rs +++ b/crates/polars-stream/src/physical_plan/lower_expr.rs @@ -1661,6 +1661,7 @@ fn lower_exprs_with_ctx( | IRAggExpr::Max { .. } | IRAggExpr::First(_) | IRAggExpr::Last(_) + | IRAggExpr::Single(_) | IRAggExpr::Sum(_) | IRAggExpr::Mean(_) | IRAggExpr::Var { .. } diff --git a/crates/polars-stream/src/physical_plan/lower_group_by.rs b/crates/polars-stream/src/physical_plan/lower_group_by.rs index 732e825aff3c..c0d6cb48c9b9 100644 --- a/crates/polars-stream/src/physical_plan/lower_group_by.rs +++ b/crates/polars-stream/src/physical_plan/lower_group_by.rs @@ -312,6 +312,7 @@ fn try_lower_elementwise_scalar_agg_expr( | IRAggExpr::Max { .. } | IRAggExpr::First(_) | IRAggExpr::Last(_) + | IRAggExpr::Single(_) | IRAggExpr::Mean(_) | IRAggExpr::Sum(_) | IRAggExpr::Var(..) diff --git a/py-polars/src/polars/_plr.pyi b/py-polars/src/polars/_plr.pyi index f11dc7608560..060c5719090a 100644 --- a/py-polars/src/polars/_plr.pyi +++ b/py-polars/src/polars/_plr.pyi @@ -1184,6 +1184,7 @@ class PyExpr: def unique_stable(self) -> PyExpr: ... def first(self) -> PyExpr: ... def last(self) -> PyExpr: ... + def single(self) -> PyExpr: ... def implode(self) -> PyExpr: ... def quantile(self, quantile: PyExpr, interpolation: Any) -> PyExpr: ... def cut( diff --git a/py-polars/src/polars/expr/expr.py b/py-polars/src/polars/expr/expr.py index 784a6f51644d..f8ce1eceead1 100644 --- a/py-polars/src/polars/expr/expr.py +++ b/py-polars/src/polars/expr/expr.py @@ -3443,6 +3443,27 @@ def last(self) -> Expr: """ return wrap_expr(self._pyexpr.last()) + def single(self) -> Expr: + """ + Get the single value. + + This raises an error if there is not exactly one value. + + Examples + -------- + >>> df = pl.DataFrame({"a": [1]}) + >>> df.select(pl.col("a").single()) + shape: (1, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 1 │ + └─────┘ + """ + return wrap_expr(self._pyexpr.single()) + def over( self, partition_by: IntoExpr | Iterable[IntoExpr] | None = None, From cac7f2c9d12e95830b8e190362a3fa2c00bc8c7c Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Wed, 15 Oct 2025 11:13:22 +0200 Subject: [PATCH 02/24] Implement List.single() and add some tests --- crates/polars-expr/src/dispatch/list.rs | 6 ++ .../src/reduce/first_last_single.rs | 78 ------------------- .../src/chunked_array/list/namespace.rs | 12 +++ .../polars-plan/src/dsl/function_expr/list.rs | 2 + crates/polars-plan/src/dsl/list.rs | 5 ++ .../src/plans/aexpr/function_expr/list.rs | 4 + .../plans/conversion/dsl_to_ir/functions.rs | 1 + .../src/plans/conversion/ir_to_dsl.rs | 1 + crates/polars-python/src/expr/list.rs | 4 + py-polars/src/polars/_plr.pyi | 3 + py-polars/src/polars/expr/expr.py | 1 + py-polars/src/polars/expr/list.py | 29 +++++++ py-polars/src/polars/series/list.py | 21 +++++ .../operations/namespaces/list/test_list.py | 6 ++ py-polars/tests/unit/test_cse.py | 2 + py-polars/tests/unit/test_schema.py | 8 +- 16 files changed, 101 insertions(+), 82 deletions(-) diff --git a/crates/polars-expr/src/dispatch/list.rs b/crates/polars-expr/src/dispatch/list.rs index 796645bf25d0..9c397d5495fd 100644 --- a/crates/polars-expr/src/dispatch/list.rs +++ b/crates/polars-expr/src/dispatch/list.rs @@ -34,6 +34,7 @@ pub fn function_expr_to_udf(func: IRListFunction) -> SpecialEq wrap!(slice), Shift => map_as_slice!(shift), Get(null_on_oob) => wrap!(get, null_on_oob), + Single => map!(single), #[cfg(feature = "list_gather")] Gather(null_on_oob) => map_as_slice!(gather, null_on_oob), #[cfg(feature = "list_gather")] @@ -254,6 +255,11 @@ pub(super) fn get(s: &mut [Column], null_on_oob: bool) -> PolarsResult { polars_ops::prelude::lst_get(ca, index, null_on_oob) } +pub(super) fn single(s: &Column) -> PolarsResult { + let list = s.list()?; + list.lst_single().map(Column::from) +} + #[cfg(feature = "list_gather")] pub(super) fn gather(args: &[Column], null_on_oob: bool) -> PolarsResult { let ca = &args[0]; diff --git a/crates/polars-expr/src/reduce/first_last_single.rs b/crates/polars-expr/src/reduce/first_last_single.rs index ada2b3c46893..8444a8e41fcf 100644 --- a/crates/polars-expr/src/reduce/first_last_single.rs +++ b/crates/polars-expr/src/reduce/first_last_single.rs @@ -424,81 +424,3 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< self } } - -// #[derive(Clone)] -// struct SingleReducer { -// inner: R, -// } - -// #[derive(Clone)] -// struct SingleValue { -// value: (Option, u64), -// got_too_many: bool, -// } - -// impl, u64)>, T: Clone + Send + Sync + 'static> Reducer -// for SingleReducer -// { -// type Dtype = R::Dtype; -// type Value = SingleValue; - -// fn init(&self) -> Self::Value { -// SingleValue { -// value: self.inner.init(), -// got_too_many: false, -// } -// } - -// fn cast_series<'a>(&self, s: &'a Series) -> Cow<'a, Series> { -// self.inner.cast_series(s) -// } - -// fn combine(&self, a: &mut Self::Value, b: &Self::Value) { -// if a.got_too_many || b.got_too_many || (a.value.0.is_some() && b.value.0.is_some()) { -// a.got_too_many = true; -// } else { -// self.inner.combine(&mut a.value, &b.value) -// } -// } - -// fn reduce_one( -// &self, -// a: &mut Self::Value, -// b: Option<::Physical<'_>>, -// seq_id: u64, -// ) { -// if a.got_too_many || (a.value.0.is_some() && b.is_some()) { -// a.got_too_many = true; -// } else { -// self.inner.reduce_one(&mut a.value, b, seq_id) -// } -// } - -// fn reduce_ca(&self, v: &mut Self::Value, ca: &ChunkedArray, seq_id: u64) { -// if v.got_too_many || (v.value.0.is_some() && !ca.is_empty()) { -// v.got_too_many = true; -// } else { -// self.inner.reduce_ca(v, ca, seq_id) -// } -// } - -// fn finish( -// &self, -// v: Vec, -// m: Option, -// dtype: &DataType, -// ) -> PolarsResult { -// if v.iter().all(|x| x.value.0.is_none()) { -// return Err(polars_err!( -// ComputeError: "Single reduction got no non-null values in any group [amber]" -// )); -// } -// if v.iter().any(|x| x.got_too_many) { -// return Err(polars_err!( -// ComputeError: "Single reduction got no non-null values in any group [amber]" -// )); -// } -// let v = v.into_iter().map(|x| x.value).collect::>(); -// self.inner.finish(v, m, dtype) -// } -// } diff --git a/crates/polars-ops/src/chunked_array/list/namespace.rs b/crates/polars-ops/src/chunked_array/list/namespace.rs index 51a168254ef2..7146164614e0 100644 --- a/crates/polars-ops/src/chunked_array/list/namespace.rs +++ b/crates/polars-ops/src/chunked_array/list/namespace.rs @@ -384,6 +384,18 @@ pub trait ListNameSpaceImpl: AsList { unsafe { s.from_physical_unchecked(ca.inner_dtype()) } } + fn lst_single(&self) -> PolarsResult { + let ca = self.as_list(); + if let Some(Some(n)) = ca + .downcast_iter() + .map(|arr| arr.offsets().lengths().find(|n| *n != 1)) + .next() + { + polars_bail!(ComputeError: "cannot unpack single value from list of length {n}"); + } + self.lst_get(0, false) + } + #[cfg(feature = "list_gather")] fn lst_gather_every(&self, n: &IdxCa, offset: &IdxCa) -> PolarsResult { let list_ca = self.as_list(); diff --git a/crates/polars-plan/src/dsl/function_expr/list.rs b/crates/polars-plan/src/dsl/function_expr/list.rs index 8f7eff83e59d..9c160a66518f 100644 --- a/crates/polars-plan/src/dsl/function_expr/list.rs +++ b/crates/polars-plan/src/dsl/function_expr/list.rs @@ -21,6 +21,7 @@ pub enum ListFunction { Slice, Shift, Get(bool), + Single, #[cfg(feature = "list_gather")] Gather(bool), #[cfg(feature = "list_gather")] @@ -80,6 +81,7 @@ impl Display for ListFunction { Slice => "slice", Shift => "shift", Get(_) => "get", + Single => "single", #[cfg(feature = "list_gather")] Gather(_) => "gather", #[cfg(feature = "list_gather")] diff --git a/crates/polars-plan/src/dsl/list.rs b/crates/polars-plan/src/dsl/list.rs index 8f19e3963ca4..02d5c4896f5d 100644 --- a/crates/polars-plan/src/dsl/list.rs +++ b/crates/polars-plan/src/dsl/list.rs @@ -143,6 +143,11 @@ impl ListNameSpace { ) } + pub fn single(self) -> Expr { + self.0 + .map_unary(FunctionExpr::ListExpr(ListFunction::Single)) + } + /// Get items in every sublist by multiple indexes. /// /// # Arguments diff --git a/crates/polars-plan/src/plans/aexpr/function_expr/list.rs b/crates/polars-plan/src/plans/aexpr/function_expr/list.rs index 557602b8a6e7..b73cb68d170d 100644 --- a/crates/polars-plan/src/plans/aexpr/function_expr/list.rs +++ b/crates/polars-plan/src/plans/aexpr/function_expr/list.rs @@ -22,6 +22,7 @@ pub enum IRListFunction { Slice, Shift, Get(bool), + Single, #[cfg(feature = "list_gather")] Gather(bool), #[cfg(feature = "list_gather")] @@ -74,6 +75,7 @@ impl IRListFunction { Slice => mapper.ensure_is_list()?.with_same_dtype(), Shift => mapper.ensure_is_list()?.with_same_dtype(), Get(_) => mapper.ensure_is_list()?.map_to_list_and_array_inner_dtype(), + Single => mapper.ensure_is_list()?.map_to_list_and_array_inner_dtype(), #[cfg(feature = "list_gather")] Gather(_) => mapper.ensure_is_list()?.with_same_dtype(), #[cfg(feature = "list_gather")] @@ -186,6 +188,7 @@ impl IRListFunction { | L::Slice | L::Shift | L::Get(_) + | L::Single | L::Length | L::Max | L::Min @@ -240,6 +243,7 @@ impl Display for IRListFunction { Slice => "slice", Shift => "shift", Get(_) => "get", + Single => "single", #[cfg(feature = "list_gather")] Gather(_) => "gather", #[cfg(feature = "list_gather")] diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir/functions.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir/functions.rs index 6637a48a9154..66b8458e487e 100644 --- a/crates/polars-plan/src/plans/conversion/dsl_to_ir/functions.rs +++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir/functions.rs @@ -167,6 +167,7 @@ pub(super) fn convert_functions( L::Slice => IL::Slice, L::Shift => IL::Shift, L::Get(v) => IL::Get(v), + L::Single => IL::Single, #[cfg(feature = "list_gather")] L::Gather(v) => IL::Gather(v), #[cfg(feature = "list_gather")] diff --git a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs index 53f9577cdff8..ec339c6df239 100644 --- a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs +++ b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs @@ -349,6 +349,7 @@ pub fn ir_function_to_dsl(input: Vec, function: IRFunctionExpr) -> Expr { IL::Slice => L::Slice, IL::Shift => L::Shift, IL::Get(v) => L::Get(v), + IL::Single => L::Single, #[cfg(feature = "list_gather")] IL::Gather(v) => L::Gather(v), #[cfg(feature = "list_gather")] diff --git a/crates/polars-python/src/expr/list.rs b/crates/polars-python/src/expr/list.rs index 4ada018a82dd..f2d065ad5523 100644 --- a/crates/polars-python/src/expr/list.rs +++ b/crates/polars-python/src/expr/list.rs @@ -70,6 +70,10 @@ impl PyExpr { .into() } + fn list_single(&self) -> Self { + self.inner.clone().list().single().into() + } + fn list_join(&self, separator: PyExpr, ignore_nulls: bool) -> Self { self.inner .clone() diff --git a/py-polars/src/polars/_plr.pyi b/py-polars/src/polars/_plr.pyi index 060c5719090a..76cbf6cd5237 100644 --- a/py-polars/src/polars/_plr.pyi +++ b/py-polars/src/polars/_plr.pyi @@ -1551,6 +1551,7 @@ class PyExpr: def list_agg(self, expr: PyExpr) -> PyExpr: ... def list_filter(self, predicate: PyExpr) -> PyExpr: ... def list_get(self, index: PyExpr, null_on_oob: bool) -> PyExpr: ... + def list_single(self) -> PyExpr: ... def list_join(self, separator: PyExpr, ignore_nulls: bool) -> PyExpr: ... def list_len(self) -> PyExpr: ... def list_max(self) -> PyExpr: ... @@ -1941,6 +1942,8 @@ class PySelector: @staticmethod def last(strict: bool) -> PySelector: ... @staticmethod + def single() -> PySelector: ... + @staticmethod def matches(pattern: str) -> PySelector: ... @staticmethod def enum_() -> PySelector: ... diff --git a/py-polars/src/polars/expr/expr.py b/py-polars/src/polars/expr/expr.py index f8ce1eceead1..544b496041a4 100644 --- a/py-polars/src/polars/expr/expr.py +++ b/py-polars/src/polars/expr/expr.py @@ -3443,6 +3443,7 @@ def last(self) -> Expr: """ return wrap_expr(self._pyexpr.last()) + @unstable() def single(self) -> Expr: """ Get the single value. diff --git a/py-polars/src/polars/expr/list.py b/py-polars/src/polars/expr/list.py index c5f68cfac23a..cefd7110c58b 100644 --- a/py-polars/src/polars/expr/list.py +++ b/py-polars/src/polars/expr/list.py @@ -8,6 +8,7 @@ from polars import exceptions from polars import functions as F from polars._utils.parse import parse_into_expression +from polars._utils.unstable import unstable from polars._utils.various import issue_warning from polars._utils.wrap import wrap_expr @@ -683,6 +684,34 @@ def last(self) -> Expr: """ return self.get(-1, null_on_oob=True) + @unstable() + def single(self) -> Expr: + """ + Get the single value of the sublists. + + This errors if the sublist length is not exactly one. + + Examples + -------- + >>> df = pl.DataFrame({"a": [[3], [1], [2]]}) + >>> df.with_columns(single=pl.col("a").list.single()) + shape: (3, 2) + ┌───────────┬────────┐ + │ a ┆ single │ + │ --- ┆ --- │ + │ list[i64] ┆ i64 │ + ╞═══════════╪════════╡ + │ [3] ┆ 3 │ + │ [1] ┆ 1 │ + │ [2] ┆ 2 │ + └───────────┴────────┘ + >>> df = pl.DataFrame({"a": [[3, 2], [1], [2]]}) + >>> df.select(pl.col("a").list.single()) + Traceback (most recent call last): + ... ComputeError: cannot unpack single value from list of length 2 + """ + return wrap_expr(self._pyexpr.list_single()) + def contains(self, item: IntoExpr, *, nulls_equal: bool = True) -> Expr: """ Check if sublists contain the given item. diff --git a/py-polars/src/polars/series/list.py b/py-polars/src/polars/series/list.py index ea0755805488..c2f52c881235 100644 --- a/py-polars/src/polars/series/list.py +++ b/py-polars/src/polars/series/list.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any, Callable from polars import functions as F +from polars._utils.unstable import unstable from polars._utils.wrap import wrap_s from polars.series.utils import expr_dispatch @@ -570,6 +571,26 @@ def last(self) -> Series: ] """ + @unstable() + def single(self) -> Series: + """ + Get the single value of the sublists. + + This errors if the sublist does not contain exactly one element. + + Examples + -------- + >>> s = pl.Series("a", [[1], [4], [6]]) + >>> s.list.single() + shape: (3,) + Series: 'a' [i64] + [ + 1 + 4 + 6 + ] + """ + def contains(self, item: IntoExpr, *, nulls_equal: bool = True) -> Series: """ Check if sublists contain the given item. diff --git a/py-polars/tests/unit/operations/namespaces/list/test_list.py b/py-polars/tests/unit/operations/namespaces/list/test_list.py index 752edb89a9ef..bb30f45a33ff 100644 --- a/py-polars/tests/unit/operations/namespaces/list/test_list.py +++ b/py-polars/tests/unit/operations/namespaces/list/test_list.py @@ -49,6 +49,12 @@ def test_list_arr_get() -> None: expected_df = pl.Series("a", [None, None, None], dtype=pl.Int64).to_frame() assert_frame_equal(out_df, expected_df) + # Single + a = pl.Series("a", [[1], [4], [6]]) + expected = pl.Series("a", [1, 4, 6]) + out = a.list.single() + assert_series_equal(out, expected) + a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]]) with pytest.raises(ComputeError, match="get index is out of bounds"): diff --git a/py-polars/tests/unit/test_cse.py b/py-polars/tests/unit/test_cse.py index 5a0ac685aa39..797d619ab302 100644 --- a/py-polars/tests/unit/test_cse.py +++ b/py-polars/tests/unit/test_cse.py @@ -346,6 +346,7 @@ def test_cse_mixed_window_functions() -> None: pl.col("b").rank().alias("d_rank"), pl.col("b").first().over([pl.col("a")]).alias("b_first"), pl.col("b").last().over([pl.col("a")]).alias("b_last"), + pl.col("b").single().over([pl.col("a")]).alias("b_single"), pl.col("b").shift().alias("b_lag_1"), pl.col("b").shift().alias("b_lead_1"), pl.col("c").cum_sum().alias("c_cumsum"), @@ -363,6 +364,7 @@ def test_cse_mixed_window_functions() -> None: "d_rank": [1.0], "b_first": [1], "b_last": [1], + "b_single": [1], "b_lag_1": [None], "b_lead_1": [None], "c_cumsum": [1], diff --git a/py-polars/tests/unit/test_schema.py b/py-polars/tests/unit/test_schema.py index 8cbdeb945be1..f000eeeed60e 100644 --- a/py-polars/tests/unit/test_schema.py +++ b/py-polars/tests/unit/test_schema.py @@ -358,16 +358,16 @@ def test_lazy_agg_to_scalar_schema_19752(lhs: pl.Expr, expr_op: str) -> None: def test_lazy_agg_schema_after_elementwise_19984() -> None: lf = pl.LazyFrame({"a": 1, "b": 1}) - q = lf.group_by("a").agg(pl.col("b").first().fill_null(0)) + q = lf.group_by("a").agg(pl.col("b").single().fill_null(0)) assert q.collect_schema() == q.collect().collect_schema() - q = lf.group_by("a").agg(pl.col("b").first().fill_null(0).fill_null(0)) + q = lf.group_by("a").agg(pl.col("b").single().fill_null(0).fill_null(0)) assert q.collect_schema() == q.collect().collect_schema() - q = lf.group_by("a").agg(pl.col("b").first() + 1) + q = lf.group_by("a").agg(pl.col("b").single() + 1) assert q.collect_schema() == q.collect().collect_schema() - q = lf.group_by("a").agg(1 + pl.col("b").first()) + q = lf.group_by("a").agg(1 + pl.col("b").single()) assert q.collect_schema() == q.collect().collect_schema() From 755f0c3a8d8b0ea6e08da6a3faeca1642babf4ce Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Wed, 15 Oct 2025 14:24:24 +0200 Subject: [PATCH 03/24] Add a testing for Expr.single --- .../src/expressions/aggregation.rs | 7 +- .../src/reduce/first_last_single.rs | 128 ++++++++++++------ .../aggregation/test_aggregations.py | 65 +++++++++ 3 files changed, 159 insertions(+), 41 deletions(-) diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index 009e2623bb4e..9f6673142281 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -131,7 +131,12 @@ impl PhysicalExpr for AggregationExpr { s.tail(Some(1)) }), GroupByMethod::Single => Ok(match s.len() { - 1 => s.head(Some(1)), + 0 => { + return Err(polars_err!(ComputeError: + "aggregation 'single' expected a single value, got none" + )); + }, + 1 => s.slice(0, 1), n => { return Err(polars_err!(ComputeError: "aggregation 'single' expected a single value, got {n} values" diff --git a/crates/polars-expr/src/reduce/first_last_single.rs b/crates/polars-expr/src/reduce/first_last_single.rs index 8444a8e41fcf..598558f43ba2 100644 --- a/crates/polars-expr/src/reduce/first_last_single.rs +++ b/crates/polars-expr/src/reduce/first_last_single.rs @@ -93,7 +93,7 @@ impl Policy for Single { struct NumFirstLastReducer(PhantomData<(P, T)>); -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] struct Value { value: Option, seq: u64, @@ -115,11 +115,7 @@ where type Value = Value; fn init(&self) -> Self::Value { - Value { - value: None, - seq: 0, - count: 0, - } + Value::default() } fn cast_series<'a>(&self, s: &'a Series) -> Cow<'a, Series> { @@ -128,7 +124,8 @@ where fn combine(&self, a: &mut Self::Value, b: &Self::Value) { if P::should_replace(b.seq, a.seq) { - *a = b.clone(); + a.value = b.value; + a.seq = b.seq; } a.count += b.count; } @@ -158,16 +155,7 @@ where ) -> PolarsResult { assert!(m.is_none()); // This should only be used with VecGroupedReduction. if P::is_single() { - if v.iter().any(|v| v.count == 0) { - return Err(polars_err!(ComputeError: - "aggregation 'single' expected a single value, got an empty group" - )); - } - if let Some(Value { count: n, .. }) = v.iter().find(|v| v.count > 1) { - return Err(polars_err!(ComputeError: - "aggregation 'single' expected a single value, got a group with {n} values" - )); - } + check_single_value(&v)?; } let ca: ChunkedArray = v .into_iter() @@ -201,10 +189,10 @@ where P: Policy, { type Dtype = BinaryType; - type Value = (Option>, u64); + type Value = Value>; fn init(&self) -> Self::Value { - (None, 0) + Value::default() } fn cast_series<'a>(&self, s: &'a Series) -> Cow<'a, Series> { @@ -212,24 +200,27 @@ where } fn combine(&self, a: &mut Self::Value, b: &Self::Value) { - if P::should_replace(b.1, a.1) { - a.0.clone_from(&b.0); - a.1 = b.1; + if P::should_replace(b.seq, a.seq) { + a.value.clone_from(&b.value); + a.seq = b.seq; } + a.count += b.count; } fn reduce_one(&self, a: &mut Self::Value, b: Option<&[u8]>, seq_id: u64) { - if P::should_replace(seq_id, a.1) { - replace_opt_bytes(&mut a.0, b); - a.1 = seq_id; + if P::should_replace(seq_id, a.seq) { + replace_opt_bytes(&mut a.value, b); + a.seq = seq_id; } + a.count += b.is_some() as u64; } fn reduce_ca(&self, v: &mut Self::Value, ca: &ChunkedArray, seq_id: u64) { - if !ca.is_empty() && P::should_replace(seq_id, v.1) { - replace_opt_bytes(&mut v.0, ca.get(P::index(ca.len()))); - v.1 = seq_id; + if !ca.is_empty() && P::should_replace(seq_id, v.seq) { + replace_opt_bytes(&mut v.value, ca.get(P::index(ca.len()))); + v.seq = seq_id; } + v.count += ca.len() as u64; } fn finish( @@ -239,7 +230,13 @@ where dtype: &DataType, ) -> PolarsResult { assert!(m.is_none()); // This should only be used with VecGroupedReduction. - let ca: BinaryChunked = v.into_iter().map(|(x, _s)| x).collect_ca(PlSmallStr::EMPTY); + if P::is_single() { + check_single_value(&v)?; + } + let ca: BinaryChunked = v + .into_iter() + .map(|Value { value, .. }| value) + .collect_ca(PlSmallStr::EMPTY); ca.into_series().cast(dtype) } } @@ -257,30 +254,34 @@ where P: Policy, { type Dtype = BooleanType; - type Value = (Option, u64); + type Value = Value; fn init(&self) -> Self::Value { - (None, 0) + Value::default() } fn combine(&self, a: &mut Self::Value, b: &Self::Value) { - if P::should_replace(b.1, a.1) { - *a = *b; + if P::should_replace(b.seq, a.seq) { + a.value = b.value; + a.seq = b.seq; } + a.count += b.count; } fn reduce_one(&self, a: &mut Self::Value, b: Option, seq_id: u64) { - if P::should_replace(seq_id, a.1) { - a.0 = b; - a.1 = seq_id; + if P::should_replace(seq_id, a.seq) { + a.value = b; + a.seq = seq_id; } + a.count += b.is_some() as u64; } fn reduce_ca(&self, v: &mut Self::Value, ca: &ChunkedArray, seq_id: u64) { - if !ca.is_empty() && P::should_replace(seq_id, v.1) { - v.0 = ca.get(P::index(ca.len())); - v.1 = seq_id; + if !ca.is_empty() && P::should_replace(seq_id, v.seq) { + v.value = ca.get(P::index(ca.len())); + v.seq = seq_id; } + v.count += ca.len() as u64; } fn finish( @@ -290,7 +291,13 @@ where _dtype: &DataType, ) -> PolarsResult { assert!(m.is_none()); // This should only be used with VecGroupedReduction. - let ca: BooleanChunked = v.into_iter().map(|(x, _s)| x).collect_ca(PlSmallStr::EMPTY); + if P::is_single() { + check_single_value(&v)?; + } + let ca: BooleanChunked = v + .into_iter() + .map(|Value { value, .. }| value) + .collect_ca(PlSmallStr::EMPTY); Ok(ca.into_series()) } } @@ -299,8 +306,10 @@ pub struct GenericFirstLastGroupedReduction

{ in_dtype: DataType, values: Vec>, seqs: Vec, + counts: Vec, evicted_values: Vec>, evicted_seqs: Vec, + evicted_counts: Vec, policy: PhantomData P>, } @@ -310,8 +319,10 @@ impl

GenericFirstLastGroupedReduction

{ in_dtype, values: Vec::new(), seqs: Vec::new(), + counts: Vec::new(), evicted_values: Vec::new(), evicted_seqs: Vec::new(), + evicted_counts: Vec::new(), policy: PhantomData, } } @@ -325,11 +336,13 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< fn reserve(&mut self, additional: usize) { self.values.reserve(additional); self.seqs.reserve(additional); + self.counts.reserve(additional); } fn resize(&mut self, num_groups: IdxSize) { self.values.resize(num_groups as usize, AnyValue::Null); self.seqs.resize(num_groups as usize, 0); + self.counts.resize(num_groups as usize, 0); } fn update_group( @@ -345,6 +358,7 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< self.values[group_idx as usize] = values.get(P::index(values.len()))?.into_static(); self.seqs[group_idx as usize] = seq_id; } + self.counts[group_idx as usize] += values.len() as u64; } Ok(()) } @@ -371,6 +385,7 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< *grp_val = values.get_unchecked(*i as usize).into_static(); *grp_seq = seq_id; } + self.counts[g.idx()] += 1; } Ok(()) } @@ -394,6 +409,7 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< other.values.get_unchecked(si).clone(); *self.seqs.get_unchecked_mut(*g as usize) = *other.seqs.get_unchecked(si); } + *self.counts.get_unchecked_mut(*g as usize) += other.counts[si]; } Ok(()) } @@ -403,16 +419,26 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< in_dtype: self.in_dtype.clone(), values: core::mem::take(&mut self.evicted_values), seqs: core::mem::take(&mut self.evicted_seqs), + counts: core::mem::take(&mut self.evicted_counts), evicted_values: Vec::new(), evicted_seqs: Vec::new(), + evicted_counts: Vec::new(), policy: PhantomData, }) } fn finalize(&mut self) -> PolarsResult { self.seqs.clear(); + if P::is_single() { + for count in self.counts.iter() { + if *count != 1 { + return Err(single_count_err(*count)); + } + } + } unsafe { let mut buf = AnyValueBufferTrusted::new(&self.in_dtype, self.values.len()); + for v in core::mem::take(&mut self.values) { buf.add_unchecked_owned_physical(&v); } @@ -424,3 +450,25 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< self } } + +fn check_single_value(v: &[Value]) -> PolarsResult<()> { + if let Some(Value { count: n, .. }) = v.iter().find(|v| v.count != 1) { + Err(single_count_err(*n)) + } else { + Ok(()) + } +} + +fn single_count_err(n: u64) -> PolarsError { + if n == 0 { + polars_err!(ComputeError: + "aggregation 'single' expected a single value, got none" + ) + } else if n > 1 { + polars_err!(ComputeError: + "aggregation 'single' expected a single value, got {n} values" + ) + } else { + unreachable!() + } +} diff --git a/py-polars/tests/unit/operations/aggregation/test_aggregations.py b/py-polars/tests/unit/operations/aggregation/test_aggregations.py index 533faa0151ca..977fa86dea60 100644 --- a/py-polars/tests/unit/operations/aggregation/test_aggregations.py +++ b/py-polars/tests/unit/operations/aggregation/test_aggregations.py @@ -5,10 +5,12 @@ import numpy as np import pytest +from hypothesis import given import polars as pl from polars.exceptions import InvalidOperationError from polars.testing import assert_frame_equal +from polars.testing.parametric import dataframes if TYPE_CHECKING: import numpy.typing as npt @@ -939,3 +941,66 @@ def test_invalid_agg_dtypes_should_raise( pl.exceptions.PolarsError, match=rf"`{op}` operation not supported for dtype" ): df.lazy().select(expr).collect(engine="streaming") + + +@given( + df=dataframes( + min_size=1, + max_size=1, + allow_null=False, + excluded_dtypes=[ + pl.Struct, + ], + ) +) +def test_single(df: pl.DataFrame) -> None: + q = df.lazy().select(pl.all(ignore_nulls=False).single()) + assert_frame_equal(q.collect(), df) + assert_frame_equal(q.collect(engine="streaming"), df) + + +@given( + df=dataframes( + max_size=0, + allow_null=False, + excluded_dtypes=[ + pl.Struct, + ], + ) +) +def test_single_empty(df: pl.DataFrame) -> None: + q = df.lazy().select(pl.all().single()) + with pytest.raises( + pl.exceptions.ComputeError, + match=r"aggregation 'single' expected a single value, got none", + ): + q.collect() + with pytest.raises( + pl.exceptions.ComputeError, + match=r"aggregation 'single' expected a single value, got none", + ): + q.collect(engine="streaming") + + +@given( + df=dataframes( + min_size=2, + allow_null=False, + excluded_dtypes=[ + pl.Struct, + ], + ) +) +def test_single_too_many(df: pl.DataFrame) -> None: + q = df.lazy().select(pl.all(ignore_nulls=False).single()) + with pytest.raises( + pl.exceptions.ComputeError, + match=rf"aggregation 'single' expected a single value, got {df.height} values", + ): + q.collect() + + with pytest.raises( + pl.exceptions.ComputeError, + match=rf"aggregation 'single' expected a single value, got {df.height} values", + ): + q.collect(engine="streaming") From 9975284ad8bc1139e2456c13efd41b0a7af61017 Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Wed, 15 Oct 2025 14:40:30 +0200 Subject: [PATCH 04/24] Some small polishing --- .../frame/group_by/aggregations/dispatch.rs | 29 ------------------- .../src/expressions/aggregation.rs | 4 +-- .../src/reduce/first_last_single.rs | 1 - py-polars/src/polars/_plr.pyi | 2 -- 4 files changed, 2 insertions(+), 34 deletions(-) diff --git a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs index 58dc04ce6d73..6fc075b0b146 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs @@ -310,33 +310,4 @@ impl Series { }; s.restore_logical(out) } - - #[doc(hidden)] - pub unsafe fn agg_single(&self, groups: &GroupsType) -> Series { - // Prevent a rechunk for every individual group. - let s = if groups.len() > 1 { - self.rechunk() - } else { - self.clone() - }; - - let out = match groups { - GroupsType::Idx(groups) => { - let indices = groups - .all() - .iter() - .map(|idx| if idx.len() == 1 { Some(idx[0]) } else { None }) - .collect_ca(PlSmallStr::EMPTY); - s.take_unchecked(&indices) - }, - GroupsType::Slice { groups, .. } => { - let indices = groups - .iter() - .map(|&[first, len]| if len == 1 { Some(first) } else { None }) - .collect_ca(PlSmallStr::EMPTY); - s.take_unchecked(&indices) - }, - }; - s.restore_logical(out) - } } diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index 9f6673142281..2d814a86b69d 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -357,14 +357,14 @@ impl PhysicalExpr for AggregationExpr { && n == 0 { return Err(polars_err!(ComputeError: - "aggregation 'single' expected a single value, got an empty group" + "aggregation 'single' expected a single value, got none" )); } if let Some(n) = gc && n > 1 { return Err(polars_err!(ComputeError: - "aggregation 'single' expected a single value, got a group with {n} values" + "aggregation 'single' expected a single value, got {n} values" )); } } diff --git a/crates/polars-expr/src/reduce/first_last_single.rs b/crates/polars-expr/src/reduce/first_last_single.rs index 598558f43ba2..aa3896aefd3a 100644 --- a/crates/polars-expr/src/reduce/first_last_single.rs +++ b/crates/polars-expr/src/reduce/first_last_single.rs @@ -438,7 +438,6 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< } unsafe { let mut buf = AnyValueBufferTrusted::new(&self.in_dtype, self.values.len()); - for v in core::mem::take(&mut self.values) { buf.add_unchecked_owned_physical(&v); } diff --git a/py-polars/src/polars/_plr.pyi b/py-polars/src/polars/_plr.pyi index 76cbf6cd5237..34243fe56f50 100644 --- a/py-polars/src/polars/_plr.pyi +++ b/py-polars/src/polars/_plr.pyi @@ -1942,8 +1942,6 @@ class PySelector: @staticmethod def last(strict: bool) -> PySelector: ... @staticmethod - def single() -> PySelector: ... - @staticmethod def matches(pattern: str) -> PySelector: ... @staticmethod def enum_() -> PySelector: ... From 8da2fe60e38f89fa480e29a7e77f74977d9b7fa7 Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Wed, 15 Oct 2025 15:04:06 +0200 Subject: [PATCH 05/24] Remove List.single() and use agg(F.element().single()) --- crates/polars-expr/src/dispatch/list.rs | 6 ------ .../polars-ops/src/chunked_array/list/namespace.rs | 12 ------------ crates/polars-plan/src/dsl/function_expr/list.rs | 2 -- crates/polars-plan/src/dsl/list.rs | 5 ----- .../src/plans/aexpr/function_expr/list.rs | 4 ---- .../src/plans/conversion/dsl_to_ir/functions.rs | 1 - crates/polars-plan/src/plans/conversion/ir_to_dsl.rs | 1 - crates/polars-python/src/expr/list.rs | 4 ---- py-polars/src/polars/expr/list.py | 4 ++-- 9 files changed, 2 insertions(+), 37 deletions(-) diff --git a/crates/polars-expr/src/dispatch/list.rs b/crates/polars-expr/src/dispatch/list.rs index 9c397d5495fd..796645bf25d0 100644 --- a/crates/polars-expr/src/dispatch/list.rs +++ b/crates/polars-expr/src/dispatch/list.rs @@ -34,7 +34,6 @@ pub fn function_expr_to_udf(func: IRListFunction) -> SpecialEq wrap!(slice), Shift => map_as_slice!(shift), Get(null_on_oob) => wrap!(get, null_on_oob), - Single => map!(single), #[cfg(feature = "list_gather")] Gather(null_on_oob) => map_as_slice!(gather, null_on_oob), #[cfg(feature = "list_gather")] @@ -255,11 +254,6 @@ pub(super) fn get(s: &mut [Column], null_on_oob: bool) -> PolarsResult { polars_ops::prelude::lst_get(ca, index, null_on_oob) } -pub(super) fn single(s: &Column) -> PolarsResult { - let list = s.list()?; - list.lst_single().map(Column::from) -} - #[cfg(feature = "list_gather")] pub(super) fn gather(args: &[Column], null_on_oob: bool) -> PolarsResult { let ca = &args[0]; diff --git a/crates/polars-ops/src/chunked_array/list/namespace.rs b/crates/polars-ops/src/chunked_array/list/namespace.rs index 7146164614e0..51a168254ef2 100644 --- a/crates/polars-ops/src/chunked_array/list/namespace.rs +++ b/crates/polars-ops/src/chunked_array/list/namespace.rs @@ -384,18 +384,6 @@ pub trait ListNameSpaceImpl: AsList { unsafe { s.from_physical_unchecked(ca.inner_dtype()) } } - fn lst_single(&self) -> PolarsResult { - let ca = self.as_list(); - if let Some(Some(n)) = ca - .downcast_iter() - .map(|arr| arr.offsets().lengths().find(|n| *n != 1)) - .next() - { - polars_bail!(ComputeError: "cannot unpack single value from list of length {n}"); - } - self.lst_get(0, false) - } - #[cfg(feature = "list_gather")] fn lst_gather_every(&self, n: &IdxCa, offset: &IdxCa) -> PolarsResult { let list_ca = self.as_list(); diff --git a/crates/polars-plan/src/dsl/function_expr/list.rs b/crates/polars-plan/src/dsl/function_expr/list.rs index 9c160a66518f..8f7eff83e59d 100644 --- a/crates/polars-plan/src/dsl/function_expr/list.rs +++ b/crates/polars-plan/src/dsl/function_expr/list.rs @@ -21,7 +21,6 @@ pub enum ListFunction { Slice, Shift, Get(bool), - Single, #[cfg(feature = "list_gather")] Gather(bool), #[cfg(feature = "list_gather")] @@ -81,7 +80,6 @@ impl Display for ListFunction { Slice => "slice", Shift => "shift", Get(_) => "get", - Single => "single", #[cfg(feature = "list_gather")] Gather(_) => "gather", #[cfg(feature = "list_gather")] diff --git a/crates/polars-plan/src/dsl/list.rs b/crates/polars-plan/src/dsl/list.rs index 02d5c4896f5d..8f19e3963ca4 100644 --- a/crates/polars-plan/src/dsl/list.rs +++ b/crates/polars-plan/src/dsl/list.rs @@ -143,11 +143,6 @@ impl ListNameSpace { ) } - pub fn single(self) -> Expr { - self.0 - .map_unary(FunctionExpr::ListExpr(ListFunction::Single)) - } - /// Get items in every sublist by multiple indexes. /// /// # Arguments diff --git a/crates/polars-plan/src/plans/aexpr/function_expr/list.rs b/crates/polars-plan/src/plans/aexpr/function_expr/list.rs index b73cb68d170d..557602b8a6e7 100644 --- a/crates/polars-plan/src/plans/aexpr/function_expr/list.rs +++ b/crates/polars-plan/src/plans/aexpr/function_expr/list.rs @@ -22,7 +22,6 @@ pub enum IRListFunction { Slice, Shift, Get(bool), - Single, #[cfg(feature = "list_gather")] Gather(bool), #[cfg(feature = "list_gather")] @@ -75,7 +74,6 @@ impl IRListFunction { Slice => mapper.ensure_is_list()?.with_same_dtype(), Shift => mapper.ensure_is_list()?.with_same_dtype(), Get(_) => mapper.ensure_is_list()?.map_to_list_and_array_inner_dtype(), - Single => mapper.ensure_is_list()?.map_to_list_and_array_inner_dtype(), #[cfg(feature = "list_gather")] Gather(_) => mapper.ensure_is_list()?.with_same_dtype(), #[cfg(feature = "list_gather")] @@ -188,7 +186,6 @@ impl IRListFunction { | L::Slice | L::Shift | L::Get(_) - | L::Single | L::Length | L::Max | L::Min @@ -243,7 +240,6 @@ impl Display for IRListFunction { Slice => "slice", Shift => "shift", Get(_) => "get", - Single => "single", #[cfg(feature = "list_gather")] Gather(_) => "gather", #[cfg(feature = "list_gather")] diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir/functions.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir/functions.rs index 66b8458e487e..6637a48a9154 100644 --- a/crates/polars-plan/src/plans/conversion/dsl_to_ir/functions.rs +++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir/functions.rs @@ -167,7 +167,6 @@ pub(super) fn convert_functions( L::Slice => IL::Slice, L::Shift => IL::Shift, L::Get(v) => IL::Get(v), - L::Single => IL::Single, #[cfg(feature = "list_gather")] L::Gather(v) => IL::Gather(v), #[cfg(feature = "list_gather")] diff --git a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs index ec339c6df239..53f9577cdff8 100644 --- a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs +++ b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs @@ -349,7 +349,6 @@ pub fn ir_function_to_dsl(input: Vec, function: IRFunctionExpr) -> Expr { IL::Slice => L::Slice, IL::Shift => L::Shift, IL::Get(v) => L::Get(v), - IL::Single => L::Single, #[cfg(feature = "list_gather")] IL::Gather(v) => L::Gather(v), #[cfg(feature = "list_gather")] diff --git a/crates/polars-python/src/expr/list.rs b/crates/polars-python/src/expr/list.rs index f2d065ad5523..4ada018a82dd 100644 --- a/crates/polars-python/src/expr/list.rs +++ b/crates/polars-python/src/expr/list.rs @@ -70,10 +70,6 @@ impl PyExpr { .into() } - fn list_single(&self) -> Self { - self.inner.clone().list().single().into() - } - fn list_join(&self, separator: PyExpr, ignore_nulls: bool) -> Self { self.inner .clone() diff --git a/py-polars/src/polars/expr/list.py b/py-polars/src/polars/expr/list.py index cefd7110c58b..5f0f80c2a714 100644 --- a/py-polars/src/polars/expr/list.py +++ b/py-polars/src/polars/expr/list.py @@ -708,9 +708,9 @@ def single(self) -> Expr: >>> df = pl.DataFrame({"a": [[3, 2], [1], [2]]}) >>> df.select(pl.col("a").list.single()) Traceback (most recent call last): - ... ComputeError: cannot unpack single value from list of length 2 + ... ComputeError: aggregation 'single' expected a single value, got 2 values """ - return wrap_expr(self._pyexpr.list_single()) + return self.agg(F.element().single()) def contains(self, item: IntoExpr, *, nulls_equal: bool = True) -> Expr: """ From 576546dd913301de023d450cd9fb28e332743f9a Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Wed, 15 Oct 2025 15:41:49 +0200 Subject: [PATCH 06/24] Not sure why this doctest failed; does it work now? --- py-polars/src/polars/expr/list.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/py-polars/src/polars/expr/list.py b/py-polars/src/polars/expr/list.py index 5f0f80c2a714..a1588dc6dbd4 100644 --- a/py-polars/src/polars/expr/list.py +++ b/py-polars/src/polars/expr/list.py @@ -708,8 +708,9 @@ def single(self) -> Expr: >>> df = pl.DataFrame({"a": [[3, 2], [1], [2]]}) >>> df.select(pl.col("a").list.single()) Traceback (most recent call last): - ... ComputeError: aggregation 'single' expected a single value, got 2 values - """ + ... + polars.exceptions.ComputeError: aggregation 'single' expected a single value, got 2 values + """ # noqa: W505 return self.agg(F.element().single()) def contains(self, item: IntoExpr, *, nulls_equal: bool = True) -> Expr: From be0bb1456a8a139cb3cc967ef091dbde2c0a19e7 Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Wed, 15 Oct 2025 16:40:39 +0200 Subject: [PATCH 07/24] Update dsl-schema-hashes --- crates/polars-plan/dsl-schema-hashes.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/polars-plan/dsl-schema-hashes.json b/crates/polars-plan/dsl-schema-hashes.json index a2c27e46c132..60917edb4129 100644 --- a/crates/polars-plan/dsl-schema-hashes.json +++ b/crates/polars-plan/dsl-schema-hashes.json @@ -1,5 +1,5 @@ { - "AggExpr": "5398ac46a31d511fa6c645556c45b3ebeba6544df2629cabac079230822b1130", + "AggExpr": "b1952c241a576472f6f9d93395eb558ccda487ce2d39a9fa0d5174adbaa51763", "AnonymousColumnsUdf": "04e8b658fac4f09f7f9607c73be6fd3fe258064dd33468710f2c3e188c281a69", "AnyValue": "ef2b7f7588918138f192b3545a8474915a90d211b7c786e642427b5cd565d4ef", "ArrayDataTypeFunction": "f6606e9a91efce34563b32adb32473cd19d8c1e9b184b102be72268d14306136", From 65f9c287cc77836c0797343801e0acde0a14948c Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Wed, 15 Oct 2025 17:21:04 +0200 Subject: [PATCH 08/24] Update update_groups_while_evicting --- crates/polars-expr/src/reduce/first_last_single.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/polars-expr/src/reduce/first_last_single.rs b/crates/polars-expr/src/reduce/first_last_single.rs index aa3896aefd3a..d1cbd2a89c0c 100644 --- a/crates/polars-expr/src/reduce/first_last_single.rs +++ b/crates/polars-expr/src/reduce/first_last_single.rs @@ -376,16 +376,18 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< for (i, g) in subset.iter().zip(group_idxs) { let grp_val = self.values.get_unchecked_mut(g.idx()); let grp_seq = self.seqs.get_unchecked_mut(g.idx()); + let grp_count = self.counts.get_unchecked_mut(g.idx()); if g.should_evict() { self.evicted_values .push(core::mem::replace(grp_val, AnyValue::Null)); self.evicted_seqs.push(core::mem::replace(grp_seq, 0)); + self.evicted_counts.push(core::mem::replace(grp_count, 0)); } if P::should_replace(seq_id, *grp_seq) { *grp_val = values.get_unchecked(*i as usize).into_static(); *grp_seq = seq_id; } - self.counts[g.idx()] += 1; + *self.counts.get_unchecked_mut(g.idx()) += 1; } Ok(()) } @@ -409,7 +411,7 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< other.values.get_unchecked(si).clone(); *self.seqs.get_unchecked_mut(*g as usize) = *other.seqs.get_unchecked(si); } - *self.counts.get_unchecked_mut(*g as usize) += other.counts[si]; + *self.counts.get_unchecked_mut(*g as usize) += *other.counts.get_unchecked(si); } Ok(()) } From 0dfbfb54c8df68e8adeaabee4ec22fc207c54915 Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Wed, 15 Oct 2025 17:30:03 +0200 Subject: [PATCH 09/24] Enable struct for the two error tests --- .../tests/unit/operations/aggregation/test_aggregations.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/py-polars/tests/unit/operations/aggregation/test_aggregations.py b/py-polars/tests/unit/operations/aggregation/test_aggregations.py index 977fa86dea60..5fb737dfbd44 100644 --- a/py-polars/tests/unit/operations/aggregation/test_aggregations.py +++ b/py-polars/tests/unit/operations/aggregation/test_aggregations.py @@ -949,6 +949,7 @@ def test_invalid_agg_dtypes_should_raise( max_size=1, allow_null=False, excluded_dtypes=[ + # TODO(amber): This is broken, but also for .first() pl.Struct, ], ) @@ -963,9 +964,6 @@ def test_single(df: pl.DataFrame) -> None: df=dataframes( max_size=0, allow_null=False, - excluded_dtypes=[ - pl.Struct, - ], ) ) def test_single_empty(df: pl.DataFrame) -> None: @@ -986,9 +984,6 @@ def test_single_empty(df: pl.DataFrame) -> None: df=dataframes( min_size=2, allow_null=False, - excluded_dtypes=[ - pl.Struct, - ], ) ) def test_single_too_many(df: pl.DataFrame) -> None: From 992c60c94f625d8368fc0634f6fca4e95b1b8a6f Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Thu, 16 Oct 2025 09:19:52 +0200 Subject: [PATCH 10/24] Set IRAggExpr::Single to not observe order --- .../src/plans/optimizer/set_order/expr_pushdown.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/crates/polars-plan/src/plans/optimizer/set_order/expr_pushdown.rs b/crates/polars-plan/src/plans/optimizer/set_order/expr_pushdown.rs index e686f1d48b3e..038150108c52 100644 --- a/crates/polars-plan/src/plans/optimizer/set_order/expr_pushdown.rs +++ b/crates/polars-plan/src/plans/optimizer/set_order/expr_pushdown.rs @@ -204,7 +204,8 @@ fn get_frame_observing_impl( | IRAggExpr::Sum(node) | IRAggExpr::Count { input: node, .. } | IRAggExpr::Std(node, _) - | IRAggExpr::Var(node, _) => { + | IRAggExpr::Var(node, _) + | IRAggExpr::Single(node) => { // Input order is deregarded, but must not observe order. _ = rec!(*node); O::None @@ -217,10 +218,7 @@ fn get_frame_observing_impl( }, // Input order observing aggregations. - IRAggExpr::Implode(node) - | IRAggExpr::First(node) - | IRAggExpr::Last(node) - | IRAggExpr::Single(node) => { + IRAggExpr::Implode(node) | IRAggExpr::First(node) | IRAggExpr::Last(node) => { if rec!(*node).has_frame_ordering() { return Err(FrameOrderObserved); } From 4f53cb0b57b9d00eb5aa7584911362f049998d6d Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Thu, 16 Oct 2025 10:27:43 +0200 Subject: [PATCH 11/24] single: Add more tests --- .../aggregation/test_aggregations.py | 68 ++++++++++++------- .../tests/unit/operations/test_group_by.py | 2 + 2 files changed, 44 insertions(+), 26 deletions(-) diff --git a/py-polars/tests/unit/operations/aggregation/test_aggregations.py b/py-polars/tests/unit/operations/aggregation/test_aggregations.py index 5fb737dfbd44..46bf85cb3520 100644 --- a/py-polars/tests/unit/operations/aggregation/test_aggregations.py +++ b/py-polars/tests/unit/operations/aggregation/test_aggregations.py @@ -947,7 +947,6 @@ def test_invalid_agg_dtypes_should_raise( df=dataframes( min_size=1, max_size=1, - allow_null=False, excluded_dtypes=[ # TODO(amber): This is broken, but also for .first() pl.Struct, @@ -960,42 +959,59 @@ def test_single(df: pl.DataFrame) -> None: assert_frame_equal(q.collect(engine="streaming"), df) -@given( - df=dataframes( - max_size=0, - allow_null=False, - ) -) +@given(df=dataframes(max_size=0)) def test_single_empty(df: pl.DataFrame) -> None: q = df.lazy().select(pl.all().single()) - with pytest.raises( - pl.exceptions.ComputeError, - match=r"aggregation 'single' expected a single value, got none", - ): + match = "aggregation 'single' expected a single value, got none" + with pytest.raises(pl.exceptions.ComputeError, match=match): q.collect() - with pytest.raises( - pl.exceptions.ComputeError, - match=r"aggregation 'single' expected a single value, got none", - ): + with pytest.raises(pl.exceptions.ComputeError, match=match): + q.collect(engine="streaming") + + +@given(df=dataframes(min_size=2)) +def test_single_too_many(df: pl.DataFrame) -> None: + q = df.lazy().select(pl.all(ignore_nulls=False).single()) + match = f"aggregation 'single' expected a single value, got {df.height} values" + with pytest.raises(pl.exceptions.ComputeError, match=match): + q.collect() + with pytest.raises(pl.exceptions.ComputeError, match=match): q.collect(engine="streaming") @given( df=dataframes( - min_size=2, + min_size=1, + max_size=1, allow_null=False, + excluded_dtypes=[ + # TODO(amber): This is broken, but also for .first() + pl.Struct, + ], ) ) -def test_single_too_many(df: pl.DataFrame) -> None: - q = df.lazy().select(pl.all(ignore_nulls=False).single()) - with pytest.raises( - pl.exceptions.ComputeError, - match=rf"aggregation 'single' expected a single value, got {df.height} values", - ): +def test_single_on_groups(df: pl.DataFrame) -> None: + df = df.with_columns(pl.col("col0").alias("key")) + q = df.lazy().group_by("col0").agg(pl.all(ignore_nulls=False).single()) + assert_frame_equal(q.collect(), df) + assert_frame_equal(q.collect(engine="streaming"), df) + + +def test_single_on_groups_empty() -> None: + df = pl.DataFrame({"col0": [[]]}) + q = df.lazy().select(pl.all().list.single()) + match = "aggregation 'single' expected a single value, got none" + with pytest.raises(pl.exceptions.ComputeError, match=match): q.collect() + with pytest.raises(pl.exceptions.ComputeError, match=match): + q.collect(engine="streaming") - with pytest.raises( - pl.exceptions.ComputeError, - match=rf"aggregation 'single' expected a single value, got {df.height} values", - ): + +def test_single_on_groups_too_many() -> None: + df = pl.DataFrame({"col0": [[1, 2, 3]]}) + q = df.lazy().select(pl.all().list.single()) + match = "aggregation 'single' expected a single value, got 3 values" + with pytest.raises(pl.exceptions.ComputeError, match=match): + q.collect() + with pytest.raises(pl.exceptions.ComputeError, match=match): q.collect(engine="streaming") diff --git a/py-polars/tests/unit/operations/test_group_by.py b/py-polars/tests/unit/operations/test_group_by.py index 60624d415158..d6952916c43b 100644 --- a/py-polars/tests/unit/operations/test_group_by.py +++ b/py-polars/tests/unit/operations/test_group_by.py @@ -1032,6 +1032,7 @@ def test_schema_on_agg() -> None: pl.col("b").sum().alias("sum"), pl.col("b").first().alias("first"), pl.col("b").last().alias("last"), + pl.col("b").single().alias("single"), ) expected_schema = { "a": pl.String, @@ -1040,6 +1041,7 @@ def test_schema_on_agg() -> None: "sum": pl.Int64, "first": pl.Int64, "last": pl.Int64, + "single": pl.Int64, } assert result.collect_schema() == expected_schema From 0c45359881ea4ca42ef24aeae815c4681045f89b Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Thu, 16 Oct 2025 10:49:59 +0200 Subject: [PATCH 12/24] Deprecate {DataFrame,Series}.item() in favor of .single() --- py-polars/src/polars/dataframe/frame.py | 36 +++++++++++++++++++------ py-polars/src/polars/series/series.py | 34 ++++++++++++++++++----- 2 files changed, 55 insertions(+), 15 deletions(-) diff --git a/py-polars/src/polars/dataframe/frame.py b/py-polars/src/polars/dataframe/frame.py index d543242e3e28..f1d09f36d4d2 100644 --- a/py-polars/src/polars/dataframe/frame.py +++ b/py-polars/src/polars/dataframe/frame.py @@ -1647,6 +1647,11 @@ def collect_schema(self) -> Schema: """ return self.schema + @deprecated( + "`DataFrame.item` is deprecated; " + "for unpacking a single value out of a dataframe as a scalar, use `DataFrame.single()`; " + "for element retrieval, use `Dataframe[row, col]` instead; " + ) def item(self, row: int | None = None, column: int | str | None = None) -> Any: """ Return the DataFrame as a scalar, or return the element at the given row/column. @@ -1678,14 +1683,7 @@ def item(self, row: int | None = None, column: int | str | None = None) -> Any: 6 """ if row is None and column is None: - if self.shape != (1, 1): - msg = ( - "can only call `.item()` if the dataframe is of shape (1, 1)," - " or if explicit row/col values are provided;" - f" frame has shape {self.shape!r}" - ) - raise ValueError(msg) - return self._df.to_series(0).get_index(0) + return self.single() elif row is None or column is None: msg = "cannot call `.item()` with only one of `row` or `column`" @@ -1698,6 +1696,28 @@ def item(self, row: int | None = None, column: int | str | None = None) -> Any: ) return s.get_index_signed(row) + @unstable() + def single(self) -> Any: + """ + Return the single value in a 1x1 DataFrame as a scalar. + + This is equivalent to `df[0,0]`, with a check that the shape is (1,1). + + Examples + -------- + >>> df = pl.DataFrame({"a": [42]}) + >>> df.single() + 42 + """ + if self.shape != (1, 1): + msg = ( + "can only call `.single()` if the dataframe is of shape (1, 1)," + " or if explicit row/col values are provided;" + f" frame has shape {self.shape!r}" + ) + raise ValueError(msg) + return self._df.to_series(0).get_index(0) + @deprecate_renamed_parameter("future", "compat_level", version="1.1") def to_arrow(self, *, compat_level: CompatLevel | None = None) -> pa.Table: """ diff --git a/py-polars/src/polars/series/series.py b/py-polars/src/polars/series/series.py index 8eb81acb8a96..cac137da4303 100644 --- a/py-polars/src/polars/series/series.py +++ b/py-polars/src/polars/series/series.py @@ -1608,6 +1608,11 @@ def _repr_html_(self) -> str: """Format output data in HTML for display in Jupyter Notebooks.""" return self.to_frame()._repr_html_(_from_series=True) + @deprecated( + "`Series.item` is deprecated; " + "for unpacking a single value out of a dataframe as a scalar, use `Series.single()`; " + "for element retrieval, use `Series[index]` instead; " + ) def item(self, index: int | None = None) -> Any: """ Return the Series as a scalar, or return the element at the given index. @@ -1625,16 +1630,31 @@ def item(self, index: int | None = None) -> Any: 24 """ if index is None: - if len(self) != 1: - msg = ( - "can only call '.item()' if the Series is of length 1," - f" or an explicit index is provided (Series is of length {len(self)})" - ) - raise ValueError(msg) - return self._s.get_index(0) + return self.single() return self._s.get_index_signed(index) + @unstable() + def single(self) -> Any: + """ + Return the single value in this Series as a scalar. + + This is equivalent to `s[0,0]`, with a check that the series length is 1. + + Examples + -------- + >>> s = pl.Series("a", [42]) + >>> s.single() + 42 + """ + if len(self) != 1: + msg = ( + "can only call '.item()' if the Series is of length 1," + f" or an explicit index is provided (Series is of length {len(self)})" + ) + raise ValueError(msg) + return self._s.get_index(0) + def estimated_size(self, unit: SizeUnit = "b") -> int | float: """ Return an estimation of the total (heap) allocated size of the Series. From 6f1da8c6c8e872d59a9c2d332fafb6c3c936a7df Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Thu, 16 Oct 2025 12:24:49 +0200 Subject: [PATCH 13/24] Replace .item() with .single() in tests --- crates/polars-plan/src/dsl/format.rs | 2 +- py-polars/src/polars/dataframe/frame.py | 2 +- py-polars/src/polars/series/series.py | 18 +++--- .../unit/constructors/test_constructors.py | 2 +- py-polars/tests/unit/dataframe/test_df.py | 14 ++-- .../{test_item.py => test_single.py} | 24 +++++-- .../tests/unit/datatypes/test_categorical.py | 4 +- .../tests/unit/datatypes/test_decimal.py | 2 +- py-polars/tests/unit/datatypes/test_enum.py | 8 +-- py-polars/tests/unit/datatypes/test_float.py | 12 ++-- .../tests/unit/datatypes/test_integer.py | 4 +- py-polars/tests/unit/datatypes/test_object.py | 2 +- .../tests/unit/datatypes/test_temporal.py | 64 +++++++++---------- py-polars/tests/unit/expr/test_exprs.py | 6 +- py-polars/tests/unit/expr/test_literal.py | 12 ++-- .../functions/as_datatype/test_datetime.py | 16 ++--- .../functions/as_datatype/test_duration.py | 5 +- .../functions/range/test_datetime_range.py | 2 +- .../unit/functions/test_business_day_count.py | 2 +- .../tests/unit/functions/test_functions.py | 10 +-- py-polars/tests/unit/functions/test_lit.py | 18 +++--- .../tests/unit/functions/test_when_then.py | 2 +- .../tests/unit/interchange/test_column.py | 4 +- .../tests/unit/io/database/test_write.py | 2 +- py-polars/tests/unit/io/test_hive.py | 6 +- py-polars/tests/unit/io/test_io_plugin.py | 2 +- .../tests/unit/io/test_lazy_count_star.py | 10 +-- py-polars/tests/unit/io/test_lazy_parquet.py | 2 +- py-polars/tests/unit/io/test_parquet.py | 4 +- py-polars/tests/unit/io/test_scan.py | 2 +- .../tests/unit/io/test_scan_row_deletion.py | 4 +- .../tests/unit/lazyframe/test_collect_all.py | 4 +- .../tests/unit/lazyframe/test_lazyframe.py | 10 +-- .../lazyframe/test_order_observability.py | 4 +- py-polars/tests/unit/ml/test_torch.py | 2 +- .../aggregation/test_aggregations.py | 4 +- .../operations/aggregation/test_vertical.py | 2 +- .../namespaces/string/test_concat.py | 12 ++-- .../namespaces/string/test_string.py | 14 ++-- .../temporal/test_add_business_days.py | 2 +- .../namespaces/temporal/test_datetime.py | 18 +++--- .../temporal/test_month_start_end.py | 8 +-- .../namespaces/temporal/test_round.py | 60 ++++++++--------- .../namespaces/temporal/test_to_datetime.py | 4 +- .../namespaces/temporal/test_truncate.py | 8 +-- .../unit/operations/namespaces/test_binary.py | 4 +- .../operations/namespaces/test_strptime.py | 58 +++++++++-------- .../unit/operations/rolling/test_rolling.py | 6 +- py-polars/tests/unit/operations/test_cast.py | 48 +++++++------- .../tests/unit/operations/test_comparison.py | 6 +- .../tests/unit/operations/test_fill_null.py | 4 +- .../tests/unit/operations/test_has_nulls.py | 2 +- .../tests/unit/operations/test_index_of.py | 6 +- py-polars/tests/unit/operations/test_is_in.py | 4 +- py-polars/tests/unit/operations/test_join.py | 10 +-- .../tests/unit/operations/test_statistics.py | 2 +- .../tests/unit/operations/test_transpose.py | 2 +- .../unit/operations/test_value_counts.py | 2 +- .../tests/unit/operations/test_window.py | 8 ++- .../unit/operations/unique/test_n_unique.py | 2 +- .../unit/operations/unique/test_unique.py | 4 +- py-polars/tests/unit/series/test_series.py | 10 +-- .../series/{test_item.py => test_single.py} | 21 ++++-- py-polars/tests/unit/sql/test_literals.py | 2 +- py-polars/tests/unit/sql/test_strings.py | 2 +- py-polars/tests/unit/sql/test_structs.py | 2 +- .../tests/unit/sql/test_table_operations.py | 2 +- .../tests/unit/streaming/test_streaming.py | 2 +- .../unit/streaming/test_streaming_group_by.py | 2 +- py-polars/tests/unit/test_datatype_exprs.py | 44 ++++++------- py-polars/tests/unit/test_datatypes.py | 4 +- py-polars/tests/unit/test_expansion.py | 4 +- py-polars/tests/unit/test_format.py | 2 +- py-polars/tests/unit/test_polars_import.py | 2 +- py-polars/tests/unit/test_projections.py | 4 +- py-polars/tests/unit/test_row_encoding.py | 4 +- py-polars/tests/unit/test_scalar.py | 4 +- py-polars/tests/unit/test_selectors.py | 10 +-- 78 files changed, 374 insertions(+), 334 deletions(-) rename py-polars/tests/unit/dataframe/{test_item.py => test_single.py} (61%) rename py-polars/tests/unit/series/{test_item.py => test_single.py} (59%) diff --git a/crates/polars-plan/src/dsl/format.rs b/crates/polars-plan/src/dsl/format.rs index 18c89a3984ec..e080f8364a5d 100644 --- a/crates/polars-plan/src/dsl/format.rs +++ b/crates/polars-plan/src/dsl/format.rs @@ -113,7 +113,7 @@ impl fmt::Debug for Expr { Mean(expr) => write!(f, "{expr:?}.mean()"), First(expr) => write!(f, "{expr:?}.first()"), Last(expr) => write!(f, "{expr:?}.last()"), - Single(expr) => write!(f, "{expr:?}.item()"), + Single(expr) => write!(f, "{expr:?}.single()"), Implode(expr) => write!(f, "{expr:?}.list()"), NUnique(expr) => write!(f, "{expr:?}.n_unique()"), Sum(expr) => write!(f, "{expr:?}.sum()"), diff --git a/py-polars/src/polars/dataframe/frame.py b/py-polars/src/polars/dataframe/frame.py index f1d09f36d4d2..8124c53750b3 100644 --- a/py-polars/src/polars/dataframe/frame.py +++ b/py-polars/src/polars/dataframe/frame.py @@ -1648,7 +1648,7 @@ def collect_schema(self) -> Schema: return self.schema @deprecated( - "`DataFrame.item` is deprecated; " + "`DataFrame.item()` is deprecated; " "for unpacking a single value out of a dataframe as a scalar, use `DataFrame.single()`; " "for element retrieval, use `Dataframe[row, col]` instead; " ) diff --git a/py-polars/src/polars/series/series.py b/py-polars/src/polars/series/series.py index cac137da4303..4d6966b486f9 100644 --- a/py-polars/src/polars/series/series.py +++ b/py-polars/src/polars/series/series.py @@ -1341,7 +1341,7 @@ def __deepcopy__(self, memo: None = None) -> Self: def __contains__(self, item: Any) -> bool: if item is None: return self.has_nulls() - return self.implode().list.contains(item).item() + return self.implode().list.contains(item).single() def __iter__(self) -> Generator[Any]: if self.dtype in (List, Array): @@ -1609,7 +1609,7 @@ def _repr_html_(self) -> str: return self.to_frame()._repr_html_(_from_series=True) @deprecated( - "`Series.item` is deprecated; " + "`Series.item()` is deprecated; " "for unpacking a single value out of a dataframe as a scalar, use `Series.single()`; " "for element retrieval, use `Series[index]` instead; " ) @@ -1623,7 +1623,7 @@ def item(self, index: int | None = None) -> Any: Examples -------- >>> s1 = pl.Series("a", [1]) - >>> s1.item() + >>> s1.single() 1 >>> s2 = pl.Series("a", [9, 8, 7]) >>> s2.cum_sum().item(-1) @@ -1649,7 +1649,7 @@ def single(self) -> Any: """ if len(self) != 1: msg = ( - "can only call '.item()' if the Series is of length 1," + "can only call '.single()' if the Series is of length 1," f" or an explicit index is provided (Series is of length {len(self)})" ) raise ValueError(msg) @@ -2204,7 +2204,7 @@ def nan_max(self) -> int | float | date | datetime | timedelta | str: >>> s.nan_max() nan """ - return self.to_frame().select_seq(F.col(self.name).nan_max()).item() + return self.to_frame().select_seq(F.col(self.name).nan_max()).single() def nan_min(self) -> int | float | date | datetime | timedelta | str: """ @@ -2223,7 +2223,7 @@ def nan_min(self) -> int | float | date | datetime | timedelta | str: >>> s.nan_min() nan """ - return self.to_frame().select_seq(F.col(self.name).nan_min()).item() + return self.to_frame().select_seq(F.col(self.name).nan_min()).single() def std(self, ddof: int = 1) -> float | timedelta | None: """ @@ -2775,7 +2775,7 @@ def entropy(self, base: float = math.e, *, normalize: bool = True) -> float | No self.to_frame() .select_seq(F.col(self.name).entropy(base, normalize=normalize)) .to_series() - .item() + .single() ) @unstable() @@ -3736,7 +3736,7 @@ def search_sorted( elif _check_for_numpy(element) and isinstance(element, np.ndarray): return df.to_series() else: - return df.item() + return df.single() def unique(self, *, maintain_order: bool = False) -> Series: """ @@ -5136,7 +5136,7 @@ def index_of(self, element: IntoExpr) -> int | None: >>> s.index_of(55) is None True """ - return F.select(F.lit(self).index_of(element)).item() + return F.select(F.lit(self).index_of(element)).single() def clear(self, n: int = 0) -> Series: """ diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py index cfcde1c18fef..ed420a8f2c27 100644 --- a/py-polars/tests/unit/constructors/test_constructors.py +++ b/py-polars/tests/unit/constructors/test_constructors.py @@ -1273,7 +1273,7 @@ def test_from_rows_dtype() -> None: dc = _TestBazDC(d=datetime(2020, 2, 22), e=42.0, f="xyz") df = pl.DataFrame([[dc]], schema={"d": pl.Object}) assert df.schema == {"d": pl.Object} - assert df.item() == dc + assert df.single() == dc def test_from_dicts_schema() -> None: diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 121ab97c569d..ac81b5eb9896 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -1061,7 +1061,7 @@ def test_is_nan_null_series() -> None: def test_len() -> None: df = pl.DataFrame({"nrs": [1, 2, 3]}) - assert cast("int", df.select(pl.col("nrs").len()).item()) == 3 + assert cast("int", df.select(pl.col("nrs").len()).single()) == 3 assert len(pl.DataFrame()) == 0 @@ -2543,10 +2543,10 @@ def test_fill_null_limits() -> None: def test_lower_bound_upper_bound(fruits_cars: pl.DataFrame) -> None: res_expr = fruits_cars.select(pl.col("A").lower_bound()) - assert res_expr.item() == -9223372036854775808 + assert res_expr.single() == -9223372036854775808 res_expr = fruits_cars.select(pl.col("B").upper_bound()) - assert res_expr.item() == 9223372036854775807 + assert res_expr.single() == 9223372036854775807 with pytest.raises(ComputeError): fruits_cars.select(pl.col("fruits").upper_bound()) @@ -2921,7 +2921,7 @@ def test_init_vs_strptime_consistency( pl.Datetime("us", dtype_time_zone) ) assert result_init.dtype == pl.Datetime("us", expected_time_zone) - assert result_init.item() == expected_item + assert result_init.single() == expected_item assert_series_equal(result_init, result_strptime) @@ -2929,12 +2929,12 @@ def test_init_vs_strptime_consistency_converts() -> None: result = pl.Series( [datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=-8)))], dtype=pl.Datetime("us", "US/Pacific"), - ).item() + ).single() assert result == datetime(2020, 1, 1, 0, 0, tzinfo=ZoneInfo(key="US/Pacific")) result = ( pl.Series(["2020-01-01 00:00-08:00"]) .str.strptime(pl.Datetime("us", "US/Pacific")) - .item() + .single() ) assert result == datetime(2020, 1, 1, 0, 0, tzinfo=ZoneInfo(key="US/Pacific")) @@ -3103,7 +3103,7 @@ def test_round() -> None: def test_dot() -> None: df = pl.DataFrame({"a": [1.8, 1.2, 3.0], "b": [3.2, 1, 2]}) - assert df.select(pl.col("a").dot(pl.col("b"))).item() == 12.96 + assert df.select(pl.col("a").dot(pl.col("b"))).single() == 12.96 def test_unstack() -> None: diff --git a/py-polars/tests/unit/dataframe/test_item.py b/py-polars/tests/unit/dataframe/test_single.py similarity index 61% rename from py-polars/tests/unit/dataframe/test_item.py rename to py-polars/tests/unit/dataframe/test_single.py index 12f9d87c913f..3986fb2a5326 100644 --- a/py-polars/tests/unit/dataframe/test_item.py +++ b/py-polars/tests/unit/dataframe/test_single.py @@ -5,25 +5,36 @@ import polars as pl -def test_df_item() -> None: +def test_df_single() -> None: df = pl.DataFrame({"a": [1]}) - assert df.item() == 1 + assert df.single() == 1 + with pytest.warns(DeprecationWarning): + assert df.item() == 1 -def test_df_item_empty() -> None: +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +def test_df_single_empty() -> None: df = pl.DataFrame() + with pytest.raises(ValueError, match=r".* frame has shape \(0, 0\)"): + df.single() with pytest.raises(ValueError, match=r".* frame has shape \(0, 0\)"): df.item() -def test_df_item_incorrect_shape_rows() -> None: +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +def test_df_single_incorrect_shape_rows() -> None: df = pl.DataFrame({"a": [1, 2]}) + with pytest.raises(ValueError, match=r".* frame has shape \(2, 1\)"): + df.single() with pytest.raises(ValueError, match=r".* frame has shape \(2, 1\)"): df.item() -def test_df_item_incorrect_shape_columns() -> None: +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +def test_df_single_incorrect_shape_columns() -> None: df = pl.DataFrame({"a": [1], "b": [2]}) + with pytest.raises(ValueError, match=r".* frame has shape \(1, 2\)"): + df.single() with pytest.raises(ValueError, match=r".* frame has shape \(1, 2\)"): df.item() @@ -42,12 +53,14 @@ def df() -> pl.DataFrame: (-2, "b", 5), ], ) +@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_df_item_with_indices( row: int, col: int | str, expected: int, df: pl.DataFrame ) -> None: assert df.item(row, col) == expected +@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_df_item_with_single_index(df: pl.DataFrame) -> None: with pytest.raises(ValueError): df.item(0) @@ -60,6 +73,7 @@ def test_df_item_with_single_index(df: pl.DataFrame) -> None: @pytest.mark.parametrize( ("row", "col"), [(0, 10), (10, 0), (10, 10), (-10, 0), (-10, 10)] ) +@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_df_item_out_of_bounds(row: int, col: int, df: pl.DataFrame) -> None: with pytest.raises(IndexError, match="out of bounds"): df.item(row, col) diff --git a/py-polars/tests/unit/datatypes/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py index 444a7f85339a..8e58dfd4450c 100644 --- a/py-polars/tests/unit/datatypes/test_categorical.py +++ b/py-polars/tests/unit/datatypes/test_categorical.py @@ -466,7 +466,7 @@ def test_categorical_asof_join_by_arg() -> None: def test_categorical_list_get_item() -> None: - out = pl.Series([["a"]]).cast(pl.List(pl.Categorical)).item() + out = pl.Series([["a"]]).cast(pl.List(pl.Categorical)).single() assert isinstance(out, pl.Series) assert out.dtype == pl.Categorical @@ -538,7 +538,7 @@ def test_fast_unique_flag_from_arrow() -> None: ).with_columns([pl.col("colB").cast(pl.Categorical)]) filtered = df.to_arrow().filter([True, False, True, True, False, True, True, True]) - assert pl.from_arrow(filtered).select(pl.col("colB").n_unique()).item() == 4 # type: ignore[union-attr] + assert pl.from_arrow(filtered).select(pl.col("colB").n_unique()).single() == 4 # type: ignore[union-attr] def test_construct_with_null() -> None: diff --git a/py-polars/tests/unit/datatypes/test_decimal.py b/py-polars/tests/unit/datatypes/test_decimal.py index b963c67c9392..ffcbb9c5d6f2 100644 --- a/py-polars/tests/unit/datatypes/test_decimal.py +++ b/py-polars/tests/unit/datatypes/test_decimal.py @@ -157,7 +157,7 @@ def test_decimal_cast_no_scale() -> None: def test_decimal_scale_precision_roundtrip(monkeypatch: Any) -> None: - assert pl.from_arrow(pl.Series("dec", [D("10.0")]).to_arrow()).item() == D("10.0") + assert pl.from_arrow(pl.Series("dec", [D("10.0")]).to_arrow()).single() == D("10.0") def test_string_to_decimal() -> None: diff --git a/py-polars/tests/unit/datatypes/test_enum.py b/py-polars/tests/unit/datatypes/test_enum.py index 2722ff53fe2a..2b2ed97b27f5 100644 --- a/py-polars/tests/unit/datatypes/test_enum.py +++ b/py-polars/tests/unit/datatypes/test_enum.py @@ -547,8 +547,8 @@ def test_enum_cse_eq() -> None: .collect() ) - assert out["dt1"].item() == "a" - assert out["dt2"].item() == "a" + assert out["dt1"].single() == "a" + assert out["dt2"].single() == "a" assert out["dt1"].dtype == pl.Enum(["a", "b"]) assert out["dt2"].dtype == pl.Enum(["a", "c"]) assert out["dt1"].dtype != out["dt2"].dtype @@ -566,8 +566,8 @@ def test_category_comparison_subset() -> None: .collect() ) - assert out["dt1"].item() == "a" - assert out["dt2"].item() == "a" + assert out["dt1"].single() == "a" + assert out["dt2"].single() == "a" assert out["dt1"].dtype == pl.Enum(["a"]) assert out["dt2"].dtype == pl.Enum(["a", "b"]) assert out["dt1"].dtype != out["dt2"].dtype diff --git a/py-polars/tests/unit/datatypes/test_float.py b/py-polars/tests/unit/datatypes/test_float.py index 0ab3ca1584a2..c7f3bee88061 100644 --- a/py-polars/tests/unit/datatypes/test_float.py +++ b/py-polars/tests/unit/datatypes/test_float.py @@ -14,8 +14,12 @@ def test_nan_in_group_by_agg() -> None: } ) - assert df.group_by("bar", "key").agg(pl.col("value").max())["value"].item() == 18.78 - assert df.group_by("bar", "key").agg(pl.col("value").min())["value"].item() == 18.58 + assert ( + df.group_by("bar", "key").agg(pl.col("value").max())["value"].single() == 18.78 + ) + assert ( + df.group_by("bar", "key").agg(pl.col("value").min())["value"].single() == 18.58 + ) def test_nan_aggregations() -> None: @@ -142,8 +146,8 @@ def test_hash() -> None: ).hash() # check them against each other since hash is not stable - assert s.item(0) == s.item(1) # hash(-0.0) == hash(0.0) - assert s.item(2) == s.item(3) # hash(float('-nan')) == hash(float('nan')) + assert s[0] == s[1] # hash(-0.0) == hash(0.0) + assert s[2] == s[3] # hash(float('-nan')) == hash(float('nan')) def test_group_by_float() -> None: diff --git a/py-polars/tests/unit/datatypes/test_integer.py b/py-polars/tests/unit/datatypes/test_integer.py index ec649dd0a87e..ad0a9446b3d4 100644 --- a/py-polars/tests/unit/datatypes/test_integer.py +++ b/py-polars/tests/unit/datatypes/test_integer.py @@ -27,5 +27,5 @@ def test_int_negate_operation() -> None: def test_compare_zero_with_uint64_16798() -> None: df = pl.Series("a", [(1 << 63), 0], dtype=pl.UInt64).to_frame() - assert df.select(pl.col("a") >= 0).item(0, 0) - assert df.select(pl.col("a") == 0).item(0, 0) is False + assert df.select(pl.col("a") >= 0)[0, 0] + assert df.select(pl.col("a") == 0)[0, 0] is False diff --git a/py-polars/tests/unit/datatypes/test_object.py b/py-polars/tests/unit/datatypes/test_object.py index 9adb71bcaa65..7e2ca5c1c61c 100644 --- a/py-polars/tests/unit/datatypes/test_object.py +++ b/py-polars/tests/unit/datatypes/test_object.py @@ -243,7 +243,7 @@ def test_object_null_slice() -> None: @pytest.mark.may_fail_cloud # reason: Object type not supported def test_object_sort_scalar_19925() -> None: a = object() - assert pl.DataFrame({"a": [0], "obj": [a]}).sort("a")["obj"].item() == a + assert pl.DataFrame({"a": [0], "obj": [a]}).sort("a")["obj"].single() == a def test_object_estimated_size() -> None: diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py index d57e62aec95b..8568c5b86b77 100644 --- a/py-polars/tests/unit/datatypes/test_temporal.py +++ b/py-polars/tests/unit/datatypes/test_temporal.py @@ -1312,7 +1312,7 @@ def test_replace_time_zone_from_to( time_unit: TimeUnit, ) -> None: ts = pl.Series(["2020-01-01"]).str.strptime(pl.Datetime(time_unit)) - result = ts.dt.replace_time_zone(from_tz).dt.replace_time_zone(to_tz).item() + result = ts.dt.replace_time_zone(from_tz).dt.replace_time_zone(to_tz).single() expected = datetime(2020, 1, 1, 0, 0, tzinfo=tzinfo) assert result == expected @@ -1321,7 +1321,7 @@ def test_strptime_with_tz() -> None: result = ( pl.Series(["2020-01-01 03:00:00"]) .str.strptime(pl.Datetime("us", "Africa/Monrovia")) - .item() + .single() ) assert result == datetime(2020, 1, 1, 3, tzinfo=ZoneInfo("Africa/Monrovia")) @@ -1388,11 +1388,11 @@ def test_convert_time_zone_lazy_schema() -> None: def test_convert_time_zone_on_tz_naive() -> None: ts = pl.Series(["2020-01-01"]).str.strptime(pl.Datetime) - result = ts.dt.convert_time_zone("Asia/Kathmandu").item() + result = ts.dt.convert_time_zone("Asia/Kathmandu").single() expected = datetime(2020, 1, 1, 5, 45, tzinfo=ZoneInfo("Asia/Kathmandu")) assert result == expected result = ( - ts.dt.replace_time_zone("UTC").dt.convert_time_zone("Asia/Kathmandu").item() + ts.dt.replace_time_zone("UTC").dt.convert_time_zone("Asia/Kathmandu").single() ) assert result == expected @@ -1495,7 +1495,7 @@ def test_replace_time_zone_ambiguous_with_ambiguous( ambiguous: Ambiguous, expected: datetime ) -> None: ts = pl.Series(["2018-10-28 02:30:00"]).str.strptime(pl.Datetime) - result = ts.dt.replace_time_zone("Europe/Brussels", ambiguous=ambiguous).item() + result = ts.dt.replace_time_zone("Europe/Brussels", ambiguous=ambiguous).single() assert result == expected @@ -1693,7 +1693,7 @@ def test_single_ambiguous_null() -> None: pl.col("ts").dt.replace_time_zone( "Europe/London", ambiguous=pl.col("ambiguous") ) - )["ts"].item() + )["ts"].single() assert result is None @@ -1702,7 +1702,7 @@ def test_unlocalize() -> None: tz_aware = tz_naive.dt.replace_time_zone("UTC").dt.convert_time_zone( "Europe/Brussels" ) - result = tz_aware.dt.replace_time_zone(None).item() + result = tz_aware.dt.replace_time_zone(None).single() assert result == datetime(2020, 1, 1, 4) @@ -1847,7 +1847,7 @@ def test_tz_aware_with_timezone_directive( ) -> None: tz_naive = pl.Series(["2020-01-01 03:00:00"]).str.strptime(pl.Datetime) tz_aware = tz_naive.dt.replace_time_zone(time_zone) - result = tz_aware.dt.to_string(directive).item() + result = tz_aware.dt.to_string(directive).single() assert result == expected @@ -2192,7 +2192,7 @@ def test_truncate_non_existent_14957() -> None: def test_cast_time_to_duration() -> None: assert pl.Series([time(hour=0, minute=0, second=2)]).cast( pl.Duration - ).item() == timedelta(seconds=2) + ).single() == timedelta(seconds=2) def test_tz_aware_day_weekday() -> None: @@ -2293,21 +2293,21 @@ def test_infer_iso8601_datetime(iso8601_format_datetime: str) -> None: .replace("%9f", "123456789") ) parsed = pl.Series([time_string]).str.strptime(pl.Datetime("ns")) - assert parsed.dt.year().item() == 2134 - assert parsed.dt.month().item() == 12 - assert parsed.dt.day().item() == 13 + assert parsed.dt.year().single() == 2134 + assert parsed.dt.month().single() == 12 + assert parsed.dt.day().single() == 13 if "%H" in iso8601_format_datetime: - assert parsed.dt.hour().item() == 1 + assert parsed.dt.hour().single() == 1 if "%M" in iso8601_format_datetime: - assert parsed.dt.minute().item() == 12 + assert parsed.dt.minute().single() == 12 if "%S" in iso8601_format_datetime: - assert parsed.dt.second().item() == 34 + assert parsed.dt.second().single() == 34 if "%9f" in iso8601_format_datetime: - assert parsed.dt.nanosecond().item() == 123456789 + assert parsed.dt.nanosecond().single() == 123456789 if "%6f" in iso8601_format_datetime: - assert parsed.dt.nanosecond().item() == 123456000 + assert parsed.dt.nanosecond().single() == 123456000 if "%3f" in iso8601_format_datetime: - assert parsed.dt.nanosecond().item() == 123000000 + assert parsed.dt.nanosecond().single() == 123000000 def test_infer_iso8601_tz_aware_datetime(iso8601_tz_aware_format_datetime: str) -> None: @@ -2325,21 +2325,21 @@ def test_infer_iso8601_tz_aware_datetime(iso8601_tz_aware_format_datetime: str) .replace("%#z", "+01:00") ) parsed = pl.Series([time_string]).str.strptime(pl.Datetime("ns")) - assert parsed.dt.year().item() == 2134 - assert parsed.dt.month().item() == 12 - assert parsed.dt.day().item() == 13 + assert parsed.dt.year().single() == 2134 + assert parsed.dt.month().single() == 12 + assert parsed.dt.day().single() == 13 if "%H" in iso8601_tz_aware_format_datetime: - assert parsed.dt.hour().item() == 1 + assert parsed.dt.hour().single() == 1 if "%M" in iso8601_tz_aware_format_datetime: - assert parsed.dt.minute().item() == 12 + assert parsed.dt.minute().single() == 12 if "%S" in iso8601_tz_aware_format_datetime: - assert parsed.dt.second().item() == 34 + assert parsed.dt.second().single() == 34 if "%9f" in iso8601_tz_aware_format_datetime: - assert parsed.dt.nanosecond().item() == 123456789 + assert parsed.dt.nanosecond().single() == 123456789 if "%6f" in iso8601_tz_aware_format_datetime: - assert parsed.dt.nanosecond().item() == 123456000 + assert parsed.dt.nanosecond().single() == 123456000 if "%3f" in iso8601_tz_aware_format_datetime: - assert parsed.dt.nanosecond().item() == 123000000 + assert parsed.dt.nanosecond().single() == 123000000 assert parsed.dtype == pl.Datetime("ns", "UTC") @@ -2351,9 +2351,9 @@ def test_infer_iso8601_date(iso8601_format_date: str) -> None: .replace("%d", "13") ) parsed = pl.Series([time_string]).str.strptime(pl.Date) - assert parsed.dt.year().item() == 2134 - assert parsed.dt.month().item() == 12 - assert parsed.dt.day().item() == 13 + assert parsed.dt.year().single() == 2134 + assert parsed.dt.month().single() == 12 + assert parsed.dt.day().single() == 13 def test_year_null_backed_by_out_of_range_15313() -> None: @@ -2438,7 +2438,7 @@ def test_weekday_vs_stdlib_datetime( pl.Series([value], dtype=pl.Datetime(time_unit)) .dt.replace_time_zone(time_zone, non_existent="null", ambiguous="null") .dt.weekday() - .item() + .single() ) if result is not None: expected = value.isoweekday() @@ -2449,7 +2449,7 @@ def test_weekday_vs_stdlib_datetime( value=st.dates(), ) def test_weekday_vs_stdlib_date(value: date) -> None: - result = pl.Series([value]).dt.weekday().item() + result = pl.Series([value]).dt.weekday().single() expected = value.isoweekday() assert result == expected diff --git a/py-polars/tests/unit/expr/test_exprs.py b/py-polars/tests/unit/expr/test_exprs.py index ee91d8aeae42..69a38a46c8b6 100644 --- a/py-polars/tests/unit/expr/test_exprs.py +++ b/py-polars/tests/unit/expr/test_exprs.py @@ -101,7 +101,7 @@ def test_len_expr() -> None: out = df.select(pl.len()) assert out.shape == (1, 1) - assert cast(int, out.item()) == 5 + assert cast(int, out.single()) == 5 out = df.group_by("b", maintain_order=True).agg(pl.len()) assert out["b"].to_list() == ["a", "b"] @@ -518,11 +518,11 @@ def lit_series(value: Any, dtype: PolarsDataType | None) -> pl.Series: def test_lit_empty_tu() -> None: td = timedelta(1) - assert pl.select(pl.lit(td, dtype=pl.Duration)).item() == td + assert pl.select(pl.lit(td, dtype=pl.Duration)).single() == td assert pl.select(pl.lit(td, dtype=pl.Duration)).dtypes[0].time_unit == "us" # type: ignore[attr-defined] t = datetime(2023, 1, 1) - assert pl.select(pl.lit(t, dtype=pl.Datetime)).item() == t + assert pl.select(pl.lit(t, dtype=pl.Datetime)).single() == t assert pl.select(pl.lit(t, dtype=pl.Datetime)).dtypes[0].time_unit == "us" # type: ignore[attr-defined] diff --git a/py-polars/tests/unit/expr/test_literal.py b/py-polars/tests/unit/expr/test_literal.py index 3e6f5e59b4d8..5192e2bed24a 100644 --- a/py-polars/tests/unit/expr/test_literal.py +++ b/py-polars/tests/unit/expr/test_literal.py @@ -34,10 +34,10 @@ def test_literal_scalar_list_18686() -> None: def test_literal_integer_20807() -> None: for i in range(100): value = 2**i - assert pl.select(pl.lit(value)).item() == value - assert pl.select(pl.lit(-value)).item() == -value - assert pl.select(pl.lit(value, dtype=pl.Int128)).item() == value - assert pl.select(pl.lit(-value, dtype=pl.Int128)).item() == -value + assert pl.select(pl.lit(value)).single() == value + assert pl.select(pl.lit(-value)).single() == -value + assert pl.select(pl.lit(value, dtype=pl.Int128)).single() == value + assert pl.select(pl.lit(-value, dtype=pl.Int128)).single() == -value @pytest.mark.parametrize( @@ -58,7 +58,7 @@ def test_literal_datetime_timezone(tz: Any, lit_dtype: pl.DataType | None) -> No assert_frame_equal(df1, df2) assert df1.schema["dt"] == expected_dtype - assert df1.item() == value + assert df1.single() == value @pytest.mark.parametrize( @@ -107,7 +107,7 @@ def test_literal_datetime_timezone_utc_offset( for df in (df1, df2): assert df.schema["dt"] == expected_dtype - assert df.item() == expected_item + assert df.single() == expected_item def test_literal_datetime_timezone_utc_error() -> None: diff --git a/py-polars/tests/unit/functions/as_datatype/test_datetime.py b/py-polars/tests/unit/functions/as_datatype/test_datetime.py index 688ecc2ec7ce..cfffc993c653 100644 --- a/py-polars/tests/unit/functions/as_datatype/test_datetime.py +++ b/py-polars/tests/unit/functions/as_datatype/test_datetime.py @@ -83,19 +83,19 @@ def test_datetime_invalid_time_component(components: list[int]) -> None: def test_datetime_time_unit(time_unit: TimeUnit) -> None: result = pl.datetime(2022, 1, 2, time_unit=time_unit) - assert pl.select(result.dt.year()).item() == 2022 - assert pl.select(result.dt.month()).item() == 1 - assert pl.select(result.dt.day()).item() == 2 + assert pl.select(result.dt.year()).single() == 2022 + assert pl.select(result.dt.month()).single() == 1 + assert pl.select(result.dt.day()).single() == 2 @pytest.mark.parametrize("time_zone", [None, "Europe/Amsterdam", "UTC"]) def test_datetime_time_zone(time_zone: str | None) -> None: result = pl.datetime(2022, 1, 2, 10, time_zone=time_zone) - assert pl.select(result.dt.year()).item() == 2022 - assert pl.select(result.dt.month()).item() == 1 - assert pl.select(result.dt.day()).item() == 2 - assert pl.select(result.dt.hour()).item() == 10 + assert pl.select(result.dt.year()).single() == 2022 + assert pl.select(result.dt.month()).single() == 1 + assert pl.select(result.dt.day()).single() == 2 + assert pl.select(result.dt.hour()).single() == 10 def test_datetime_ambiguous_time_zone() -> None: @@ -110,7 +110,7 @@ def test_datetime_ambiguous_time_zone_earliest() -> None: 2018, 10, 28, 2, 30, time_zone="Europe/Brussels", ambiguous="earliest" ) - result = pl.select(expr).item() + result = pl.select(expr).single() expected = datetime(2018, 10, 28, 2, 30, tzinfo=ZoneInfo("Europe/Brussels")) assert result == expected diff --git a/py-polars/tests/unit/functions/as_datatype/test_duration.py b/py-polars/tests/unit/functions/as_datatype/test_duration.py index 3f6f99770142..bd58e7e349d4 100644 --- a/py-polars/tests/unit/functions/as_datatype/test_duration.py +++ b/py-polars/tests/unit/functions/as_datatype/test_duration.py @@ -39,10 +39,11 @@ def test_duration_time_units(time_unit: TimeUnit, expected: timedelta) -> None: ) ) assert result.collect_schema()["duration"] == pl.Duration(time_unit) - assert result.collect()["duration"].item() == expected + assert result.collect()["duration"].single() == expected if time_unit == "ns": assert ( - result.collect()["duration"].dt.total_nanoseconds().item() == 86523004005006 + result.collect()["duration"].dt.total_nanoseconds().single() + == 86523004005006 ) diff --git a/py-polars/tests/unit/functions/range/test_datetime_range.py b/py-polars/tests/unit/functions/range/test_datetime_range.py index d052900b1e45..668373a99f7e 100644 --- a/py-polars/tests/unit/functions/range/test_datetime_range.py +++ b/py-polars/tests/unit/functions/range/test_datetime_range.py @@ -602,7 +602,7 @@ def test_datetime_range_fast_slow_paths( unit: str, start: datetime, ) -> None: - end = pl.select(pl.lit(start).dt.offset_by(f"{n * size}{unit}")).item() + end = pl.select(pl.lit(start).dt.offset_by(f"{n * size}{unit}")).single() result_slow = pl.datetime_range( start, end, diff --git a/py-polars/tests/unit/functions/test_business_day_count.py b/py-polars/tests/unit/functions/test_business_day_count.py index 1a33a69c81af..32531ec636b5 100644 --- a/py-polars/tests/unit/functions/test_business_day_count.py +++ b/py-polars/tests/unit/functions/test_business_day_count.py @@ -161,7 +161,7 @@ def test_against_np_busday_count( "start", "end", week_mask=week_mask, holidays=holidays ) )["n"] - .item() + .single() ) expected = np.busday_count(start, end, weekmask=week_mask, holidays=holidays) if start > end and parse_version(np.__version__) < (1, 25): diff --git a/py-polars/tests/unit/functions/test_functions.py b/py-polars/tests/unit/functions/test_functions.py index c4bd4c87f4e1..7c786b4d53a0 100644 --- a/py-polars/tests/unit/functions/test_functions.py +++ b/py-polars/tests/unit/functions/test_functions.py @@ -237,7 +237,7 @@ def test_cov() -> None: # expect same result from both approaches for idx, (r1, r2) in enumerate(zip(res1, res2)): expected_value = -645.8333333333 if idx == 0 else -1291.6666666666 - assert pytest.approx(expected_value) == r1.item() + assert pytest.approx(expected_value) == r1.single() assert_series_equal(r1, r2) @@ -260,7 +260,7 @@ def test_corr() -> None: # expect same result from both approaches for idx, (r1, r2) in enumerate(zip(res1, res2)): - assert pytest.approx(-0.412199756 if idx == 0 else -0.5) == r1.item() + assert pytest.approx(-0.412199756 if idx == 0 else -0.5) == r1.single() assert_series_equal(r1, r2) @@ -284,10 +284,12 @@ def test_null_handling_correlation() -> None: df1 = pl.DataFrame({"a": [None, 1, 2], "b": [None, 2, 1]}) df2 = pl.DataFrame({"a": [np.nan, 1, 2], "b": [np.nan, 2, 1]}) - assert np.isclose(df1.select(pl.corr("a", "b", method="spearman")).item(), -1.0) + assert np.isclose(df1.select(pl.corr("a", "b", method="spearman")).single(), -1.0) assert ( str( - df2.select(pl.corr("a", "b", method="spearman", propagate_nans=True)).item() + df2.select( + pl.corr("a", "b", method="spearman", propagate_nans=True) + ).single() ) == "nan" ) diff --git a/py-polars/tests/unit/functions/test_lit.py b/py-polars/tests/unit/functions/test_lit.py index ad3df8b9e422..1aa9140f5f04 100644 --- a/py-polars/tests/unit/functions/test_lit.py +++ b/py-polars/tests/unit/functions/test_lit.py @@ -145,9 +145,9 @@ class State(*EnumBase): # type: ignore[misc] pl.lit(value), pl.lit(value.value), # type: ignore[attr-defined] ): - assert pl.select(lit_value).item() == expected - assert df.filter(state=value).item() == expected - assert df.filter(state=lit_value).item() == expected + assert pl.select(lit_value).single() == expected + assert df.filter(state=value).single() == expected + assert df.filter(state=lit_value).single() == expected assert df.filter(pl.col("state") == State.QLD).is_empty() assert df.filter(pl.col("state") != State.QLD).height == 2 @@ -174,11 +174,11 @@ class Number(*EnumBase): # type: ignore[misc] result = pl.lit(value) assert pl.select(result).dtypes[0] == pl.Int32 - assert pl.select(result).item() == 1 + assert pl.select(result).single() == 1 result = pl.lit(value, dtype=pl.Int8) assert pl.select(result).dtypes[0] == pl.Int8 - assert pl.select(result).item() == 1 + assert pl.select(result).single() == 1 @given(value=datetimes("ns")) @@ -215,7 +215,7 @@ def test_lit_decimal() -> None: expr = pl.lit(value) df = pl.select(expr) - result = df.item() + result = df.single() assert df.dtypes[0] == pl.Decimal(None, 1) assert result == value @@ -226,7 +226,7 @@ def test_lit_string_float() -> None: expr = pl.lit(value, dtype=pl.Utf8) df = pl.select(expr) - result = df.item() + result = df.single() assert df.dtypes[0] == pl.String assert result == str(value) @@ -236,11 +236,11 @@ def test_lit_string_float() -> None: @given(s=series(min_size=1, max_size=1, allow_null=False, allowed_dtypes=pl.Decimal)) def test_lit_decimal_parametric(s: pl.Series) -> None: scale = s.dtype.scale # type: ignore[attr-defined] - value = s.item() + value = s.single() expr = pl.lit(value) df = pl.select(expr) - result = df.item() + result = df.single() assert df.dtypes[0] == pl.Decimal(None, scale) assert result == value diff --git a/py-polars/tests/unit/functions/test_when_then.py b/py-polars/tests/unit/functions/test_when_then.py index 43e646d09b0c..dcf4d60c9458 100644 --- a/py-polars/tests/unit/functions/test_when_then.py +++ b/py-polars/tests/unit/functions/test_when_then.py @@ -328,7 +328,7 @@ def test_single_element_broadcast( expected = df.select("x").head( df.select( pl.max_horizontal(mask_expr.len(), truthy_expr.len(), falsy_expr.len()) - ).item() + ).single() ) assert_frame_equal(result, expected) diff --git a/py-polars/tests/unit/interchange/test_column.py b/py-polars/tests/unit/interchange/test_column.py index abe592fe3e83..4b9aade0a126 100644 --- a/py-polars/tests/unit/interchange/test_column.py +++ b/py-polars/tests/unit/interchange/test_column.py @@ -251,8 +251,8 @@ def test_get_buffers_chunked_bitmask() -> None: col = PolarsColumn(s_chunked) chunks = list(col.get_chunks()) - assert chunks[0].get_buffers()["data"][0]._data.item() is True - assert chunks[1].get_buffers()["data"][0]._data.item() is False + assert chunks[0].get_buffers()["data"][0]._data.single() is True + assert chunks[1].get_buffers()["data"][0]._data.single() is False def test_get_buffers_string_zero_copy_fails() -> None: diff --git a/py-polars/tests/unit/io/database/test_write.py b/py-polars/tests/unit/io/database/test_write.py index 8cd5a4ee54f9..77130772eeec 100644 --- a/py-polars/tests/unit/io/database/test_write.py +++ b/py-polars/tests/unit/io/database/test_write.py @@ -301,7 +301,7 @@ def test_write_database_sa_rollback(tmp_path: str, pass_connection: bool) -> Non with Session(engine) as session: count = pl.read_database( query=f"select count(*) from {table_name}", connection=session - ).item(0, 0) + )[0, 0] assert isinstance(count, int) assert count == 0 diff --git a/py-polars/tests/unit/io/test_hive.py b/py-polars/tests/unit/io/test_hive.py index 90c98c68eb38..2e345616fb36 100644 --- a/py-polars/tests/unit/io/test_hive.py +++ b/py-polars/tests/unit/io/test_hive.py @@ -349,8 +349,8 @@ def test_hive_partition_directory_scan( ] # fmt: skip for df in dfs: - a = df.item(0, "a") - b = df.item(0, "b") + a = df[0, "a"] + b = df[0, "b"] path = tmp_path / f"a={a}/b={b}/data.bin" path.parent.mkdir(exist_ok=True, parents=True) write_func(df.drop("a", "b"), path) @@ -777,7 +777,7 @@ def test_hive_partition_filter_null_23005(tmp_path: Path) -> None: pl.any_horizontal(pl.col("date1", "date2").is_null()) & pl.col("path").str.contains("__HIVE_DEFAULT_PARTITION__") ).sum() - ).item() + ).single() == 2 ) diff --git a/py-polars/tests/unit/io/test_io_plugin.py b/py-polars/tests/unit/io/test_io_plugin.py index e67ed18d9a8b..45b917b8b682 100644 --- a/py-polars/tests/unit/io/test_io_plugin.py +++ b/py-polars/tests/unit/io/test_io_plugin.py @@ -180,7 +180,7 @@ def _source( # check the expression directly dt_val, column_cast = pushed_predicate.meta.pop() # Extract the datetime value from the expression - assert pl.DataFrame({}).select(dt_val).item() == cutoff + assert pl.DataFrame({}).select(dt_val).single() == cutoff column = column_cast.meta.pop()[0] assert column.meta == pl.col("timestamp") diff --git a/py-polars/tests/unit/io/test_lazy_count_star.py b/py-polars/tests/unit/io/test_lazy_count_star.py index 42e672c090e1..28b77c660bcb 100644 --- a/py-polars/tests/unit/io/test_lazy_count_star.py +++ b/py-polars/tests/unit/io/test_lazy_count_star.py @@ -43,7 +43,7 @@ def assert_fast_count( assert project_logs == {"project: 0"} assert result.schema == {expected_name: pl.get_index_type()} - assert result.item() == expected_count + assert result.single() == expected_count # Test effect of the environment variable monkeypatch.setenv("POLARS_FAST_FILE_COUNT_DISPATCH", "0") @@ -114,8 +114,10 @@ def test_count_csv_no_newline_on_last_22564() -> None: assert pl.scan_csv(data).collect().height == 3 assert pl.scan_csv(data, comment_prefix="#").collect().height == 3 - assert pl.scan_csv(data).select(pl.len()).collect().item() == 3 - assert pl.scan_csv(data, comment_prefix="#").select(pl.len()).collect().item() == 3 + assert pl.scan_csv(data).select(pl.len()).collect().single() == 3 + assert ( + pl.scan_csv(data, comment_prefix="#").select(pl.len()).collect().single() == 3 + ) @pytest.mark.write_disk @@ -229,7 +231,7 @@ def test_count_projection_pd( project_logs = set(re.findall(r"project: \d+", capture)) assert project_logs == {"project: 0"} - assert result.item() == 3 + assert result.single() == 3 def test_csv_scan_skip_lines_len_22889( diff --git a/py-polars/tests/unit/io/test_lazy_parquet.py b/py-polars/tests/unit/io/test_lazy_parquet.py index 4dbee96024a8..aee3efb42ca1 100644 --- a/py-polars/tests/unit/io/test_lazy_parquet.py +++ b/py-polars/tests/unit/io/test_lazy_parquet.py @@ -66,7 +66,7 @@ def test_row_index(foods_parquet_path: Path) -> None: def test_row_index_len_16543(foods_parquet_path: Path) -> None: q = pl.scan_parquet(foods_parquet_path).with_row_index() - assert q.select(pl.all()).select(pl.len()).collect().item() == 27 + assert q.select(pl.all()).select(pl.len()).collect().single() == 27 @pytest.mark.write_disk diff --git a/py-polars/tests/unit/io/test_parquet.py b/py-polars/tests/unit/io/test_parquet.py index 8670c5356020..326686191760 100644 --- a/py-polars/tests/unit/io/test_parquet.py +++ b/py-polars/tests/unit/io/test_parquet.py @@ -573,7 +573,7 @@ def test_parquet_nano_second_schema() -> None: df = pd.DataFrame({"Time": [value]}) df.to_parquet(f) f.seek(0) - assert pl.read_parquet(f).item() == value + assert pl.read_parquet(f).single() == value def test_nested_struct_read_12610() -> None: @@ -2733,7 +2733,7 @@ def test_boolean_slice_pushdown_20314() -> None: s.to_frame().write_parquet(f) f.seek(0) - assert pl.scan_parquet(f).slice(2, 1).collect().item() + assert pl.scan_parquet(f).slice(2, 1).collect().single() def test_load_pred_pushdown_fsl_19241() -> None: diff --git a/py-polars/tests/unit/io/test_scan.py b/py-polars/tests/unit/io/test_scan.py index 88ab7f9efdc6..494437d5dcd3 100644 --- a/py-polars/tests/unit/io/test_scan.py +++ b/py-polars/tests/unit/io/test_scan.py @@ -918,7 +918,7 @@ def test_scan_csv_bytesio_memory_usage( pl.scan_csv(f) .filter(pl.col("mydata") == 999_999) .collect(engine="streaming" if streaming else "in-memory") - .item() + .single() == 999_999 ) # assert memory_usage.get_peak() - starting_memory < 1_000_000 diff --git a/py-polars/tests/unit/io/test_scan_row_deletion.py b/py-polars/tests/unit/io/test_scan_row_deletion.py index 60e3a333ac3e..7ed92d056726 100644 --- a/py-polars/tests/unit/io/test_scan_row_deletion.py +++ b/py-polars/tests/unit/io/test_scan_row_deletion.py @@ -97,7 +97,7 @@ def apply_row_index_offset(values: list[int]) -> list[int]: hive_partitioning=False, ).with_row_index(offset=row_index_offset) - assert q.select(pl.len()).collect().item() == 18 + assert q.select(pl.len()).collect().single() == 18 assert_frame_equal( q.collect(), @@ -389,7 +389,7 @@ def remove_data(path: Path) -> None: # Baseline: The metadata is readable but the row groups are not assert q.collect_schema() == {"physical_index": pl.UInt32} - assert q.select(pl.len()).collect().item() == 5 + assert q.select(pl.len()).collect().single() == 5 with pytest.raises(pl.exceptions.ComputeError, match="Invalid thrift"): q.collect() diff --git a/py-polars/tests/unit/lazyframe/test_collect_all.py b/py-polars/tests/unit/lazyframe/test_collect_all.py index 8922c96a0b2d..140fdf52d3cc 100644 --- a/py-polars/tests/unit/lazyframe/test_collect_all.py +++ b/py-polars/tests/unit/lazyframe/test_collect_all.py @@ -16,5 +16,5 @@ def test_collect_all(df: pl.DataFrame, optimizations: pl.QueryOptFlags) -> None: lf1 = df.lazy().select(pl.col("int").sum()) lf2 = df.lazy().select((pl.col("floats") * 2).sum()) out = pl.collect_all([lf1, lf2], optimizations=optimizations) - assert cast(int, out[0].item()) == 6 - assert cast(float, out[1].item()) == 12.0 + assert cast(int, out[0].single()) == 6 + assert cast(float, out[1].single()) == 12.0 diff --git a/py-polars/tests/unit/lazyframe/test_lazyframe.py b/py-polars/tests/unit/lazyframe/test_lazyframe.py index e6fa0e82ff5e..7a957c1fdafd 100644 --- a/py-polars/tests/unit/lazyframe/test_lazyframe.py +++ b/py-polars/tests/unit/lazyframe/test_lazyframe.py @@ -207,7 +207,7 @@ def test_filter_multiple_predicates() -> None: "predicate": ["==", ">", ">="], }, ) - assert ldf.filter(predicate="==").select("description").collect().item() == "eq" + assert ldf.filter(predicate="==").select("description").collect().single() == "eq" @pytest.mark.parametrize( @@ -490,7 +490,7 @@ def test_is_finite_is_infinite() -> None: def test_len() -> None: ldf = pl.LazyFrame({"nrs": [1, 2, 3]}) - assert cast(int, ldf.select(pl.col("nrs").len()).collect().item()) == 3 + assert cast(int, ldf.select(pl.col("nrs").len()).collect().single()) == 3 @pytest.mark.parametrize("dtype", NUMERIC_DTYPES) @@ -576,7 +576,7 @@ def test_dot() -> None: ldf = pl.LazyFrame({"a": [1.8, 1.2, 3.0], "b": [3.2, 1, 2]}).select( pl.col("a").dot(pl.col("b")) ) - assert cast(float, ldf.collect().item()) == 12.96 + assert cast(float, ldf.collect().single()) == 12.96 def test_sort() -> None: @@ -870,7 +870,7 @@ def test_float_floor_divide() -> None: x = 10.4 step = 0.5 ldf = pl.LazyFrame({"x": [x]}) - ldf_res = ldf.with_columns(pl.col("x") // step).collect().item() + ldf_res = ldf.with_columns(pl.col("x") // step).collect().single() assert ldf_res == x // step @@ -1499,7 +1499,7 @@ def test_unique_length_multiple_columns() -> None: "b": [100, 100, 200, 100, 300], } ) - assert lf.unique().select(pl.len()).collect().item() == 4 + assert lf.unique().select(pl.len()).collect().single() == 4 def test_asof_cross_join() -> None: diff --git a/py-polars/tests/unit/lazyframe/test_order_observability.py b/py-polars/tests/unit/lazyframe/test_order_observability.py index a145be6c4ed4..321d97c7b46e 100644 --- a/py-polars/tests/unit/lazyframe/test_order_observability.py +++ b/py-polars/tests/unit/lazyframe/test_order_observability.py @@ -541,7 +541,7 @@ def test_reverse_non_order_observe() -> None: plan = q.explain() assert "UNIQUE[maintain_order: false" in plan - assert q.collect().item() == 10 + assert q.collect().single() == 10 # Observing the order of the output of `reverse()` implicitly observes the # input to `reverse()`. @@ -554,7 +554,7 @@ def test_reverse_non_order_observe() -> None: plan = q.explain() assert "UNIQUE[maintain_order: true" in plan - assert q.collect().item() == 0 + assert q.collect().single() == 0 # Zipping `reverse()` must also consider the ordering of the input to # `reverse()`. diff --git a/py-polars/tests/unit/ml/test_torch.py b/py-polars/tests/unit/ml/test_torch.py index 735c0cb7a4be..fa8656fbc696 100644 --- a/py-polars/tests/unit/ml/test_torch.py +++ b/py-polars/tests/unit/ml/test_torch.py @@ -62,7 +62,7 @@ def test_to_torch_tensor(df: pl.DataFrame) -> None: t2 = df.to_torch("tensor") assert list(t1.shape) == [4, 3] - assert (t1 == t2).all().item() is True + assert (t1 == t2).all().single() is True def test_to_torch_dict(df: pl.DataFrame) -> None: diff --git a/py-polars/tests/unit/operations/aggregation/test_aggregations.py b/py-polars/tests/unit/operations/aggregation/test_aggregations.py index 46bf85cb3520..4f820872974e 100644 --- a/py-polars/tests/unit/operations/aggregation/test_aggregations.py +++ b/py-polars/tests/unit/operations/aggregation/test_aggregations.py @@ -282,8 +282,8 @@ def test_sum_empty_and_null_set() -> None: {"a": [None, None, None], "b": [1, 1, 1]}, schema={"a": pl.Float32, "b": pl.Int64}, ) - assert df.select(pl.sum("a")).item() == 0.0 - assert df.group_by("b").agg(pl.sum("a"))["a"].item() == 0.0 + assert df.select(pl.sum("a")).single() == 0.0 + assert df.group_by("b").agg(pl.sum("a"))["a"].single() == 0.0 def test_horizontal_sum_null_to_identity() -> None: diff --git a/py-polars/tests/unit/operations/aggregation/test_vertical.py b/py-polars/tests/unit/operations/aggregation/test_vertical.py index fc74fdf59b65..073769ccda67 100644 --- a/py-polars/tests/unit/operations/aggregation/test_vertical.py +++ b/py-polars/tests/unit/operations/aggregation/test_vertical.py @@ -36,7 +36,7 @@ def test_all_expr() -> None: def test_any_expr(fruits_cars: pl.DataFrame) -> None: - assert fruits_cars.with_columns(pl.col("A").cast(bool)).select(pl.any("A")).item() + assert fruits_cars.with_columns(pl.col("A").cast(bool)).select(pl.any("A")).single() @pytest.mark.parametrize("function", ["all", "any"]) diff --git a/py-polars/tests/unit/operations/namespaces/string/test_concat.py b/py-polars/tests/unit/operations/namespaces/string/test_concat.py index 13ee591cd3a8..5025e84ef5f6 100644 --- a/py-polars/tests/unit/operations/namespaces/string/test_concat.py +++ b/py-polars/tests/unit/operations/namespaces/string/test_concat.py @@ -27,10 +27,10 @@ def test_str_join2() -> None: df = pl.DataFrame({"foo": [1, None, 2, None]}) out = df.select(pl.col("foo").str.join(ignore_nulls=False)) - assert out.item() is None + assert out.single() is None out = df.select(pl.col("foo").str.join()) - assert out.item() == "12" + assert out.single() == "12" def test_str_join_all_null() -> None: @@ -50,14 +50,14 @@ def test_str_join_empty_list() -> None: def test_str_join_empty_list2() -> None: s = pl.Series([], dtype=pl.String) df = pl.DataFrame({"foo": s}) - result = df.select(pl.col("foo").str.join()).item() + result = df.select(pl.col("foo").str.join()).single() expected = "" assert result == expected def test_str_join_empty_list_agg_context() -> None: df = pl.DataFrame(data={"i": [1], "v": [None]}, schema_overrides={"v": pl.String}) - result = df.group_by("i").agg(pl.col("v").drop_nulls().str.join())["v"].item() + result = df.group_by("i").agg(pl.col("v").drop_nulls().str.join())["v"].single() expected = "" assert result == expected @@ -65,9 +65,9 @@ def test_str_join_empty_list_agg_context() -> None: def test_str_join_datetime() -> None: df = pl.DataFrame({"d": [datetime(2020, 1, 1), None, datetime(2022, 1, 1)]}) out = df.select(pl.col("d").str.join("|", ignore_nulls=True)) - assert out.item() == "2020-01-01 00:00:00.000000|2022-01-01 00:00:00.000000" + assert out.single() == "2020-01-01 00:00:00.000000|2022-01-01 00:00:00.000000" out = df.select(pl.col("d").str.join("|", ignore_nulls=False)) - assert out.item() is None + assert out.single() is None def test_str_concat_deprecated() -> None: diff --git a/py-polars/tests/unit/operations/namespaces/string/test_string.py b/py-polars/tests/unit/operations/namespaces/string/test_string.py index a425792cd2ff..9084a0b4be33 100644 --- a/py-polars/tests/unit/operations/namespaces/string/test_string.py +++ b/py-polars/tests/unit/operations/namespaces/string/test_string.py @@ -327,7 +327,7 @@ def test_str_find_invalid_regex() -> None: df.with_columns(pl.col("txt").str.find(rx_invalid, strict=True)) res = df.with_columns(pl.col("txt").str.find(rx_invalid, strict=False)) - assert res.item() is None + assert res.single() is None def test_str_find_escaped_chars() -> None: @@ -1058,7 +1058,7 @@ def test_contains_any( expected == df["text"] .str.contains_any(pattern, ascii_case_insensitive=case_insensitive) - .item() + .single() ) # expr assert ( @@ -1067,7 +1067,7 @@ def test_contains_any( pl.col("text").str.contains_any( pattern, ascii_case_insensitive=case_insensitive ) - )["text"].item() + )["text"].single() ) # frame filter assert int(expected) == len( @@ -1282,7 +1282,7 @@ def test_replace_many( expected == df["text"] .str.replace_many(pattern, replacement, ascii_case_insensitive=case_insensitive) - .item() + .single() ) # expr assert ( @@ -1293,7 +1293,7 @@ def test_replace_many( replacement, ascii_case_insensitive=case_insensitive, ) - ).item() + ).single() ) @@ -1339,7 +1339,7 @@ def test_replace_many_mapping( expected == df["text"] .str.replace_many(mapping, ascii_case_insensitive=case_insensitive) - .item() + .single() ) # expr assert ( @@ -1349,7 +1349,7 @@ def test_replace_many_mapping( mapping, ascii_case_insensitive=case_insensitive, ) - ).item() + ).single() ) diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py b/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py index b7c77ef473bb..f1895bf0c67e 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py @@ -279,7 +279,7 @@ def test_against_np_busday_offset( n, week_mask=week_mask, holidays=holidays, roll=roll ) )["res"] - .item() + .single() ) expected = np.busday_offset( start, n, weekmask=week_mask, holidays=holidays, roll=roll diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py index d752e9ed74d4..924dbd8f983a 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py @@ -106,7 +106,7 @@ def test_dt_date_and_time( attribute: str, time_zone: None | str, expected: date | time ) -> None: ser = pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone) - result = getattr(ser.dt, attribute)().item() + result = getattr(ser.dt, attribute)().single() assert result == expected @@ -121,7 +121,7 @@ def test_dt_replace_time_zone_none(time_zone: str | None, time_unit: TimeUnit) - result = ser.dt.replace_time_zone(None) expected = datetime(2022, 1, 1, 23) assert result.dtype == pl.Datetime(time_unit, None) - assert result.item() == expected + assert result.single() == expected def test_dt_datetime_deprecated() -> None: @@ -130,7 +130,7 @@ def test_dt_datetime_deprecated() -> None: result = s.dt.datetime() expected = datetime(2022, 1, 1, 23) assert result.dtype == pl.Datetime(time_zone=None) - assert result.item() == expected + assert result.single() == expected @pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu", "UTC"]) @@ -175,7 +175,7 @@ def test_local_time_before_epoch(time_unit: TimeUnit) -> None: ser = pl.Series([datetime(1969, 7, 21, 2, 56, 2, 123000)]).dt.cast_time_unit( time_unit ) - result = ser.dt.time().item() + result = ser.dt.time().single() expected = time(2, 56, 2, 123000) assert result == expected @@ -1042,7 +1042,7 @@ def test_offset_by_expressions() -> None: def test_offset_by_saturating_8217_8474( duration: str, input_date: date, expected: date ) -> None: - result = pl.Series([input_date]).dt.offset_by(duration).item() + result = pl.Series([input_date]).dt.offset_by(duration).single() assert result == expected @@ -1463,7 +1463,7 @@ def test_literal_from_date( if dtype == pl.Datetime: tz = ZoneInfo(dtype.time_zone) if dtype.time_zone is not None else None # type: ignore[union-attr] value = datetime(value.year, value.month, value.day, tzinfo=tz) - assert out.item() == value + assert out.single() == value @pytest.mark.parametrize( @@ -1511,7 +1511,7 @@ def test_literal_from_datetime( value = value.replace(tzinfo=ZoneInfo(dtype.time_zone)) # type: ignore[union-attr] assert out.schema == OrderedDict({"literal": dtype}) - assert out.item() == value + assert out.single() == value @pytest.mark.parametrize( @@ -1526,7 +1526,7 @@ def test_literal_from_datetime( def test_literal_from_time(value: time) -> None: out = pl.select(pl.lit(value)) assert out.schema == OrderedDict({"literal": pl.Time}) - assert out.item() == value + assert out.single() == value @pytest.mark.parametrize( @@ -1550,4 +1550,4 @@ def test_literal_from_time(value: time) -> None: def test_literal_from_timedelta(value: time, dtype: pl.Duration | None) -> None: out = pl.select(pl.lit(value, dtype=dtype)) assert out.schema == OrderedDict({"literal": dtype or pl.Duration("us")}) - assert out.item() == value + assert out.single() == value diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_month_start_end.py b/py-polars/tests/unit/operations/namespaces/temporal/test_month_start_end.py index f25192106090..8b99d0e0e406 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_month_start_end.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_month_start_end.py @@ -39,7 +39,7 @@ def test_month_start_datetime( time_zone: str | None, ) -> None: ser = pl.Series([dt]).dt.replace_time_zone(time_zone).dt.cast_time_unit(time_unit) - result = ser.dt.month_start().item() + result = ser.dt.month_start().single() assert result == expected.replace(tzinfo=tzinfo) @@ -52,7 +52,7 @@ def test_month_start_datetime( ) def test_month_start_date(dt: date, expected: date) -> None: ser = pl.Series([dt]) - result = ser.dt.month_start().item() + result = ser.dt.month_start().single() assert result == expected @@ -84,7 +84,7 @@ def test_month_end_datetime( time_zone: str | None, ) -> None: ser = pl.Series([dt]).dt.replace_time_zone(time_zone).dt.cast_time_unit(time_unit) - result = ser.dt.month_end().item() + result = ser.dt.month_end().single() assert result == expected.replace(tzinfo=tzinfo) @@ -97,7 +97,7 @@ def test_month_end_datetime( ) def test_month_end_date(dt: date, expected: date) -> None: ser = pl.Series([dt]) - result = ser.dt.month_end().item() + result = ser.dt.month_end().single() assert result == expected diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_round.py b/py-polars/tests/unit/operations/namespaces/temporal/test_round.py index 5fefcdaf5893..7d37dc655b16 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_round.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_round.py @@ -169,9 +169,9 @@ def test_round_date() -> None: @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) def test_round_datetime_simple(time_unit: TimeUnit) -> None: s = pl.Series([datetime(2020, 1, 2, 6)], dtype=pl.Datetime(time_unit)) - result = s.dt.round("1mo").item() + result = s.dt.round("1mo").single() assert result == datetime(2020, 1, 1) - result = s.dt.round("1d").item() + result = s.dt.round("1d").single() assert result == datetime(2020, 1, 2) @@ -197,14 +197,14 @@ def test_round_datetime_w_expression(time_unit: TimeUnit) -> None: def test_round_negative_towards_epoch_18239(time_unit: TimeUnit, expected: int) -> None: s = pl.Series([datetime(1970, 1, 1)], dtype=pl.Datetime(time_unit)) s = s.dt.offset_by(f"-1{time_unit}") - result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").item() + result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").single() assert result == expected result = ( s.dt.replace_time_zone("Europe/London") .dt.round(f"2{time_unit}") .dt.replace_time_zone(None) .dt.timestamp(time_unit="ns") - .item() + .single() ) assert result == expected @@ -222,14 +222,14 @@ def test_round_positive_away_from_epoch_18239( ) -> None: s = pl.Series([datetime(1970, 1, 1)], dtype=pl.Datetime(time_unit)) s = s.dt.offset_by(f"1{time_unit}") - result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").item() + result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").single() assert result == expected result = ( s.dt.replace_time_zone("Europe/London") .dt.round(f"2{time_unit}") .dt.replace_time_zone(None) .dt.timestamp(time_unit="ns") - .item() + .single() ) assert result == expected @@ -249,33 +249,33 @@ def test_round_unequal_length_22018(as_date: bool) -> None: def test_round_small() -> None: small = 1.234e-320 small_s = pl.Series([small]) - assert small_s.round().item() == 0.0 - assert small_s.round(320).item() == 1e-320 - assert small_s.round(321).item() == 1.2e-320 - assert small_s.round(322).item() == 1.23e-320 - assert small_s.round(323).item() == 1.234e-320 - assert small_s.round(324).item() == small - assert small_s.round(1000).item() == small - - assert small_s.round_sig_figs(1).item() == 1e-320 - assert small_s.round_sig_figs(2).item() == 1.2e-320 - assert small_s.round_sig_figs(3).item() == 1.23e-320 - assert small_s.round_sig_figs(4).item() == 1.234e-320 - assert small_s.round_sig_figs(5).item() == small - assert small_s.round_sig_figs(1000).item() == small + assert small_s.round().single() == 0.0 + assert small_s.round(320).single() == 1e-320 + assert small_s.round(321).single() == 1.2e-320 + assert small_s.round(322).single() == 1.23e-320 + assert small_s.round(323).single() == 1.234e-320 + assert small_s.round(324).single() == small + assert small_s.round(1000).single() == small + + assert small_s.round_sig_figs(1).single() == 1e-320 + assert small_s.round_sig_figs(2).single() == 1.2e-320 + assert small_s.round_sig_figs(3).single() == 1.23e-320 + assert small_s.round_sig_figs(4).single() == 1.234e-320 + assert small_s.round_sig_figs(5).single() == small + assert small_s.round_sig_figs(1000).single() == small def test_round_big() -> None: big = 1.234e308 max_err = big / 10**10 big_s = pl.Series([big]) - assert big_s.round().item() == big - assert big_s.round(1).item() == big - assert big_s.round(100).item() == big - - assert abs(big_s.round_sig_figs(1).item() - 1e308) <= max_err - assert abs(big_s.round_sig_figs(2).item() - 1.2e308) <= max_err - assert abs(big_s.round_sig_figs(3).item() - 1.23e308) <= max_err - assert abs(big_s.round_sig_figs(4).item() - 1.234e308) <= max_err - assert abs(big_s.round_sig_figs(4).item() - big) <= max_err - assert big_s.round_sig_figs(100).item() == big + assert big_s.round().single() == big + assert big_s.round(1).single() == big + assert big_s.round(100).single() == big + + assert abs(big_s.round_sig_figs(1).single() - 1e308) <= max_err + assert abs(big_s.round_sig_figs(2).single() - 1.2e308) <= max_err + assert abs(big_s.round_sig_figs(3).single() - 1.23e308) <= max_err + assert abs(big_s.round_sig_figs(4).single() - 1.234e308) <= max_err + assert abs(big_s.round_sig_figs(4).single() - big) <= max_err + assert big_s.round_sig_figs(100).single() == big diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_to_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_to_datetime.py index 14e6d6e060f0..b9f05ca4cfc3 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_to_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_to_datetime.py @@ -109,7 +109,7 @@ def test_to_datetime(datetimes: datetime, fmt: str) -> None: input = datetimes.strftime(fmt) expected = datetime.strptime(input, fmt) try: - result = pl.Series([input]).str.to_datetime(format=fmt).item() + result = pl.Series([input]).str.to_datetime(format=fmt).single() # If there's an exception, check that it's either: # - something which polars can't parse at all: missing day or month # - something on which polars intentionally raises @@ -196,7 +196,7 @@ def test_to_datetime_aware_values_aware_dtype() -> None: def test_to_datetime_two_digit_year_17213( inputs: str, format: str, expected: date ) -> None: - result = pl.Series([inputs]).str.to_date(format=format).item() + result = pl.Series([inputs]).str.to_date(format=format).single() assert result == expected diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py b/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py index ba802fb7c295..08a2cabad409 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py @@ -24,7 +24,7 @@ n=st.integers(min_value=1, max_value=100), ) def test_truncate_monthly(value: date, n: int) -> None: - result = pl.Series([value]).dt.truncate(f"{n}mo").item() + result = pl.Series([value]).dt.truncate(f"{n}mo").single() # manual calculation total = (value.year - 1970) * 12 + value.month - 1 remainder = total % n @@ -79,9 +79,9 @@ def test_truncate_date() -> None: @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) def test_truncate_datetime_simple(time_unit: TimeUnit) -> None: s = pl.Series([datetime(2020, 1, 2, 6)], dtype=pl.Datetime(time_unit)) - result = s.dt.truncate("1mo").item() + result = s.dt.truncate("1mo").single() assert result == datetime(2020, 1, 1) - result = s.dt.truncate("1d").item() + result = s.dt.truncate("1d").single() assert result == datetime(2020, 1, 2) @@ -157,7 +157,7 @@ def test_truncate_origin_22590( .dt.replace_time_zone(time_zone) .dt.truncate(f"{multiplier}{unit}") .dt.replace_time_zone(None) - .item() + .single() ) assert result == expected, result diff --git a/py-polars/tests/unit/operations/namespaces/test_binary.py b/py-polars/tests/unit/operations/namespaces/test_binary.py index 9124fce068d5..8de08ab2fb3a 100644 --- a/py-polars/tests/unit/operations/namespaces/test_binary.py +++ b/py-polars/tests/unit/operations/namespaces/test_binary.py @@ -167,8 +167,8 @@ def test_compare_decode_between_lazy_and_eager_6814(encoding: TransferEncoding) def test_binary_size(sz: int, unit: SizeUnit, expected: int | float) -> None: df = pl.DataFrame({"data": [b"\x00" * sz]}, schema={"data": pl.Binary}) for sz in ( - df.select(sz=pl.col("data").bin.size(unit)).item(), # expr - df["data"].bin.size(unit).item(), # series + df.select(sz=pl.col("data").bin.size(unit)).single(), # expr + df["data"].bin.size(unit).single(), # series ): assert sz == expected diff --git a/py-polars/tests/unit/operations/namespaces/test_strptime.py b/py-polars/tests/unit/operations/namespaces/test_strptime.py index 051371cec8a5..6a22c25ba103 100644 --- a/py-polars/tests/unit/operations/namespaces/test_strptime.py +++ b/py-polars/tests/unit/operations/namespaces/test_strptime.py @@ -38,12 +38,12 @@ def test_str_strptime() -> None: def test_date_parse_omit_day() -> None: df = pl.DataFrame({"month": ["2022-01"]}) - assert df.select(pl.col("month").str.to_date(format="%Y-%m")).item() == date( + assert df.select(pl.col("month").str.to_date(format="%Y-%m")).single() == date( 2022, 1, 1 ) assert df.select( pl.col("month").str.to_datetime(format="%Y-%m") - ).item() == datetime(2022, 1, 1) + ).single() == datetime(2022, 1, 1) def test_to_datetime_precision() -> None: @@ -280,7 +280,7 @@ def test_to_datetime_dates_datetimes() -> None: ], ) def test_to_datetime_patterns_single(time_string: str, expected: str) -> None: - result = pl.Series([time_string]).str.to_datetime().item() + result = pl.Series([time_string]).str.to_datetime().single() assert result == expected @@ -290,7 +290,7 @@ def test_infer_tz_aware_time_unit(time_unit: TimeUnit) -> None: time_unit=time_unit ) assert result.dtype == pl.Datetime(time_unit, "UTC") - assert result.item() == datetime(2020, 1, 2, 2, 0, tzinfo=timezone.utc) + assert result.single() == datetime(2020, 1, 2, 2, 0, tzinfo=timezone.utc) @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) @@ -299,14 +299,14 @@ def test_infer_tz_aware_with_utc(time_unit: TimeUnit) -> None: time_unit=time_unit ) assert result.dtype == pl.Datetime(time_unit, "UTC") - assert result.item() == datetime(2020, 1, 2, 2, 0, tzinfo=timezone.utc) + assert result.single() == datetime(2020, 1, 2, 2, 0, tzinfo=timezone.utc) def test_str_to_datetime_infer_tz_aware() -> None: result = ( pl.Series(["2020-01-02T04:00:00+02:00"]) .str.to_datetime(time_unit="us", time_zone="Europe/Vienna") - .item() + .single() ) assert result == datetime(2020, 1, 2, 3, tzinfo=ZoneInfo("Europe/Vienna")) @@ -412,13 +412,13 @@ def test_parse_negative_dates( s = pl.Series([ts]) result = s.str.to_datetime(format, time_unit="ms") # Python datetime.datetime doesn't support negative dates, so comparing - # with `result.item()` directly won't work. - assert result.dt.year().item() == exp_year - assert result.dt.month().item() == exp_month - assert result.dt.day().item() == exp_day - assert result.dt.hour().item() == exp_hour - assert result.dt.minute().item() == exp_minute - assert result.dt.second().item() == exp_second + # with `result.single()` directly won't work. + assert result.dt.year().single() == exp_year + assert result.dt.month().single() == exp_month + assert result.dt.day().single() == exp_day + assert result.dt.hour().single() == exp_hour + assert result.dt.minute().single() == exp_minute + assert result.dt.second().single() == exp_second def test_short_formats() -> None: @@ -445,7 +445,7 @@ def test_strptime_abbrev_month( time_string: str, fmt: str, datatype: PolarsTemporalType, expected: date ) -> None: s = pl.Series([time_string]) - result = s.str.strptime(datatype, fmt).item() + result = s.str.strptime(datatype, fmt).single() assert result == expected @@ -538,7 +538,7 @@ def test_to_datetime_ambiguous_or_non_existent() -> None: ], ) def test_to_datetime_tz_aware_strptime(ts: str, fmt: str, expected: datetime) -> None: - result = pl.Series([ts]).str.to_datetime(fmt).item() + result = pl.Series([ts]).str.to_datetime(fmt).single() assert result == expected @@ -575,7 +575,7 @@ def test_crossing_dst_tz_aware(format: str) -> None: ) def test_strptime_subseconds_datetime(data: str, format: str, expected: time) -> None: s = pl.Series([data]) - result = s.str.to_datetime(format).item() + result = s.str.to_datetime(format).single() assert result == expected @@ -643,7 +643,7 @@ def test_strptime_incomplete_formats(string: str, fmt: str) -> None: ) def test_strptime_complete_formats(string: str, fmt: str, expected: datetime) -> None: # Similar to the above, but these formats are complete and should work - result = pl.Series([string]).str.to_datetime(fmt).item() + result = pl.Series([string]).str.to_datetime(fmt).single() assert result == expected @@ -676,8 +676,8 @@ def test_to_time_inferred(data: str, format: str, expected: time) -> None: def test_to_time_subseconds(data: str, format: str, expected: time) -> None: s = pl.Series([data]) for res in ( - s.str.to_time().item(), - s.str.to_time(format).item(), + s.str.to_time().single(), + s.str.to_time(format).single(), ): assert res == expected @@ -685,7 +685,7 @@ def test_to_time_subseconds(data: str, format: str, expected: time) -> None: def test_to_time_format_warning() -> None: s = pl.Series(["05:10:10.074000"]) with pytest.warns(ChronoFormatWarning, match=".%f"): - result = s.str.to_time("%H:%M:%S.%f").item() + result = s.str.to_time("%H:%M:%S.%f").single() assert result == time(5, 10, 10, 74) @@ -694,14 +694,14 @@ def test_to_datetime_ambiguous_earliest(exact: bool) -> None: result = ( pl.Series(["2020-10-25 01:00"]) .str.to_datetime(time_zone="Europe/London", ambiguous="earliest", exact=exact) - .item() + .single() ) expected = datetime(2020, 10, 25, 1, fold=0, tzinfo=ZoneInfo("Europe/London")) assert result == expected result = ( pl.Series(["2020-10-25 01:00"]) .str.to_datetime(time_zone="Europe/London", ambiguous="latest", exact=exact) - .item() + .single() ) expected = datetime(2020, 10, 25, 1, fold=1, tzinfo=ZoneInfo("Europe/London")) assert result == expected @@ -709,7 +709,7 @@ def test_to_datetime_ambiguous_earliest(exact: bool) -> None: pl.Series(["2020-10-25 01:00"]).str.to_datetime( time_zone="Europe/London", exact=exact, - ).item() + ).single() def test_to_datetime_naive_format_and_time_zone() -> None: @@ -731,7 +731,7 @@ def test_strptime_ambiguous_earliest(exact: bool) -> None: .str.strptime( pl.Datetime("us", "Europe/London"), ambiguous="earliest", exact=exact ) - .item() + .single() ) expected = datetime(2020, 10, 25, 1, fold=0, tzinfo=ZoneInfo("Europe/London")) assert result == expected @@ -740,7 +740,7 @@ def test_strptime_ambiguous_earliest(exact: bool) -> None: .str.strptime( pl.Datetime("us", "Europe/London"), ambiguous="latest", exact=exact ) - .item() + .single() ) expected = datetime(2020, 10, 25, 1, fold=1, tzinfo=ZoneInfo("Europe/London")) assert result == expected @@ -748,7 +748,7 @@ def test_strptime_ambiguous_earliest(exact: bool) -> None: pl.Series(["2020-10-25 01:00"]).str.strptime( pl.Datetime("us", "Europe/London"), exact=exact, - ).item() + ).single() @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) @@ -757,7 +757,9 @@ def test_to_datetime_out_of_range_13401(time_unit: TimeUnit) -> None: with pytest.raises(InvalidOperationError, match="conversion .* failed"): s.str.to_datetime("%Y-%B-%d %H:%M:%S", time_unit=time_unit) assert ( - s.str.to_datetime("%Y-%B-%d %H:%M:%S", strict=False, time_unit=time_unit).item() + s.str.to_datetime( + "%Y-%B-%d %H:%M:%S", strict=False, time_unit=time_unit + ).single() is None ) @@ -822,7 +824,7 @@ def test_strptime_empty_input_22214() -> None: ) def test_matching_strings_but_different_format_22495(value: str) -> None: s = pl.Series("my_strings", [value]) - result = s.str.to_date("%Y-%m-%d", strict=False).item() + result = s.str.to_date("%Y-%m-%d", strict=False).single() assert result is None diff --git a/py-polars/tests/unit/operations/rolling/test_rolling.py b/py-polars/tests/unit/operations/rolling/test_rolling.py index 848323b36e4b..c39878a08c18 100644 --- a/py-polars/tests/unit/operations/rolling/test_rolling.py +++ b/py-polars/tests/unit/operations/rolling/test_rolling.py @@ -1100,10 +1100,10 @@ def test_rolling_median_2() -> None: # this can differ because simd sizes and non-associativity of floats. assert df.select( pl.col("x").rolling_median(window_size=10).sum() - ).item() == pytest.approx(5.139429061527812) + ).single() == pytest.approx(5.139429061527812) assert df.select( pl.col("x").rolling_median(window_size=100).sum() - ).item() == pytest.approx(26.60506093611384) + ).single() == pytest.approx(26.60506093611384) @pytest.mark.parametrize( @@ -1801,5 +1801,5 @@ def test_rolling_rank_method_random( ).all() ) .collect() - .item() + .single() ) diff --git a/py-polars/tests/unit/operations/test_cast.py b/py-polars/tests/unit/operations/test_cast.py index a354572bcdbd..e723d2b509f0 100644 --- a/py-polars/tests/unit/operations/test_cast.py +++ b/py-polars/tests/unit/operations/test_cast.py @@ -180,7 +180,7 @@ def _cast_series( dtype_out: PolarsDataType, strict: bool, ) -> int | datetime | date | time | timedelta | None: - return pl.Series("a", [val], dtype=dtype_in).cast(dtype_out, strict=strict).item() # type: ignore[no-any-return] + return pl.Series("a", [val], dtype=dtype_in).cast(dtype_out, strict=strict).single() # type: ignore[no-any-return] def _cast_expr( @@ -193,7 +193,7 @@ def _cast_expr( pl.Series("a", [val], dtype=dtype_in) .to_frame() .select(pl.col("a").cast(dtype_out, strict=strict)) - .item() + .single() ) @@ -203,7 +203,9 @@ def _cast_lit( dtype_out: PolarsDataType, strict: bool, ) -> int | datetime | date | time | timedelta | None: - return pl.select(pl.lit(val, dtype=dtype_in).cast(dtype_out, strict=strict)).item() # type: ignore[no-any-return] + return ( # type: ignore[no-any-return] + pl.select(pl.lit(val, dtype=dtype_in).cast(dtype_out, strict=strict)).single() + ) @pytest.mark.parametrize( @@ -364,13 +366,13 @@ def test_strict_cast_temporal( args = [value, from_dtype, to_dtype, True] if should_succeed: out = _cast_series_t(*args) # type: ignore[arg-type] - assert out.item() == expected_value + assert out.single() == expected_value assert out.dtype == to_dtype out = _cast_expr_t(*args) # type: ignore[arg-type] - assert out.item() == expected_value + assert out.single() == expected_value assert out.dtype == to_dtype out = _cast_lit_t(*args) # type: ignore[arg-type] - assert out.item() == expected_value + assert out.single() == expected_value assert out.dtype == to_dtype else: with pytest.raises(InvalidOperationError): @@ -436,23 +438,23 @@ def test_cast_temporal( args = [value, from_dtype, to_dtype, False] out = _cast_series_t(*args) # type: ignore[arg-type] if expected_value is None: - assert out.item() is None + assert out.single() is None else: - assert out.item() == expected_value + assert out.single() == expected_value assert out.dtype == to_dtype out = _cast_expr_t(*args) # type: ignore[arg-type] if expected_value is None: - assert out.item() is None + assert out.single() is None else: - assert out.item() == expected_value + assert out.single() == expected_value assert out.dtype == to_dtype out = _cast_lit_t(*args) # type: ignore[arg-type] if expected_value is None: - assert out.item() is None + assert out.single() is None else: - assert out.item() == expected_value + assert out.single() == expected_value assert out.dtype == to_dtype @@ -486,23 +488,23 @@ def test_cast_string( args = [value, from_dtype, to_dtype, False] out = _cast_series_t(*args) # type: ignore[arg-type] if expected_value is None: - assert out.item() is None + assert out.single() is None else: - assert out.item() == expected_value + assert out.single() == expected_value assert out.dtype == to_dtype out = _cast_expr_t(*args) # type: ignore[arg-type] if expected_value is None: - assert out.item() is None + assert out.single() is None else: - assert out.item() == expected_value + assert out.single() == expected_value assert out.dtype == to_dtype out = _cast_lit_t(*args) # type: ignore[arg-type] if expected_value is None: - assert out.item() is None + assert out.single() is None else: - assert out.item() == expected_value + assert out.single() == expected_value assert out.dtype == to_dtype @@ -538,13 +540,13 @@ def test_strict_cast_string( args = [value, from_dtype, to_dtype, True] if should_succeed: out = _cast_series_t(*args) # type: ignore[arg-type] - assert out.item() == expected_value + assert out.single() == expected_value assert out.dtype == to_dtype out = _cast_expr_t(*args) # type: ignore[arg-type] - assert out.item() == expected_value + assert out.single() == expected_value assert out.dtype == to_dtype out = _cast_lit_t(*args) # type: ignore[arg-type] - assert out.item() == expected_value + assert out.single() == expected_value assert out.dtype == to_dtype else: with pytest.raises(InvalidOperationError): @@ -704,14 +706,14 @@ def test_all_null_cast_5826() -> None: df = pl.DataFrame(data=[pl.Series("a", [None], dtype=pl.String)]) out = df.with_columns(pl.col("a").cast(pl.Boolean)) assert out.dtypes == [pl.Boolean] - assert out.item() is None + assert out.single() is None @pytest.mark.parametrize("dtype", INTEGER_DTYPES) def test_bool_numeric_supertype(dtype: PolarsDataType) -> None: df = pl.DataFrame({"v": [1, 2, 3, 4, 5, 6]}) result = df.select((pl.col("v") < 3).sum().cast(dtype) / pl.len()) - assert result.item() - 0.3333333 <= 0.00001 + assert result.single() - 0.3333333 <= 0.00001 @pytest.mark.parametrize("dtype", [pl.String(), pl.String, str]) diff --git a/py-polars/tests/unit/operations/test_comparison.py b/py-polars/tests/unit/operations/test_comparison.py index 2634971d3e15..4371a3cccb33 100644 --- a/py-polars/tests/unit/operations/test_comparison.py +++ b/py-polars/tests/unit/operations/test_comparison.py @@ -140,7 +140,7 @@ def test_offset_handling_arg_where_7863() -> None: assert ( df_check.select((pl.lit(0).append(pl.col("a")).append(0)) != 0) .select(pl.col("literal").arg_true()) - .item() + .single() == 2 ) @@ -456,10 +456,10 @@ def test_schema_ne_missing_9256() -> None: def test_nested_binary_literal_super_type_12227() -> None: # The `.alias` is important here to trigger the bug. result = pl.select(x=1).select((pl.lit(0) + ((pl.col("x") > 0) * 0.1)).alias("x")) - assert result.item() == 0.1 + assert result.single() == 0.1 result = pl.select((pl.lit(0) + (pl.lit(0) == pl.lit(0)) * pl.lit(0.1)) + pl.lit(0)) - assert result.item() == 0.1 + assert result.single() == 0.1 def test_struct_broadcasting_comparison() -> None: diff --git a/py-polars/tests/unit/operations/test_fill_null.py b/py-polars/tests/unit/operations/test_fill_null.py index bc3099ef1a2a..ff2a79d3e712 100644 --- a/py-polars/tests/unit/operations/test_fill_null.py +++ b/py-polars/tests/unit/operations/test_fill_null.py @@ -44,7 +44,7 @@ def test_fill_null_non_lit() -> None: "d": pl.Series([None, 2], dtype=pl.Decimal(10, 2)), } ) - assert df.fill_null(0).select(pl.all().null_count()).transpose().sum().item() == 0 + assert df.fill_null(0).select(pl.all().null_count()).transpose().sum().single() == 0 def test_fill_null_f32_with_lit() -> None: @@ -62,7 +62,7 @@ def test_fill_null_lit_() -> None: } ) assert ( - df.fill_null(pl.lit(0)).select(pl.all().null_count()).transpose().sum().item() + df.fill_null(pl.lit(0)).select(pl.all().null_count()).transpose().sum().single() == 0 ) diff --git a/py-polars/tests/unit/operations/test_has_nulls.py b/py-polars/tests/unit/operations/test_has_nulls.py index 7a78c9a09ff8..77152412b6d1 100644 --- a/py-polars/tests/unit/operations/test_has_nulls.py +++ b/py-polars/tests/unit/operations/test_has_nulls.py @@ -13,7 +13,7 @@ def test_has_nulls_series_no_nulls(s: pl.Series) -> None: @given(df=dataframes(allow_null=False)) def test_has_nulls_expr_no_nulls(df: pl.DataFrame) -> None: result = df.select(pl.all().has_nulls()) - assert result.select(pl.any_horizontal(df.columns)).item() is False + assert result.select(pl.any_horizontal(df.columns)).single() is False @given( diff --git a/py-polars/tests/unit/operations/test_index_of.py b/py-polars/tests/unit/operations/test_index_of.py index 95baaac1882d..049548b4da8e 100644 --- a/py-polars/tests/unit/operations/test_index_of.py +++ b/py-polars/tests/unit/operations/test_index_of.py @@ -126,8 +126,8 @@ def test_integer(dtype: IntegerType) -> None: 3, None, 4, - pl.select(dtype_max).item(), - pl.select(dtype_min).item(), + pl.select(dtype_max).single(), + pl.select(dtype_min).single(), ] series = pl.Series(values, dtype=dtype) sorted_series_asc = series.sort(descending=False) @@ -136,7 +136,7 @@ def test_integer(dtype: IntegerType) -> None: [pl.Series([100, 7], dtype=dtype), series], rechunk=False ) - extra_values = [pl.select(v).item() for v in [dtype_max - 1, dtype_min + 1]] + extra_values = [pl.select(v).single() for v in [dtype_max - 1, dtype_min + 1]] for s in [series, sorted_series_asc, sorted_series_desc, chunked_series]: value: IntoExpr for value in values: diff --git a/py-polars/tests/unit/operations/test_is_in.py b/py-polars/tests/unit/operations/test_is_in.py index 71fb7886d069..47312a0497cd 100644 --- a/py-polars/tests/unit/operations/test_is_in.py +++ b/py-polars/tests/unit/operations/test_is_in.py @@ -160,7 +160,7 @@ def test_is_in_struct() -> None: def test_is_in_null_prop() -> None: - assert pl.Series([None], dtype=pl.Float32).is_in(pl.Series([42])).item() is None + assert pl.Series([None], dtype=pl.Float32).is_in(pl.Series([42])).single() is None assert pl.Series([{"a": None}, None], dtype=pl.Struct({"a": pl.Float32})).is_in( pl.Series([{"a": 42}], dtype=pl.Struct({"a": pl.Float32})) ).to_list() == [False, None] @@ -171,7 +171,7 @@ def test_is_in_null_prop() -> None: def test_is_in_9070() -> None: - assert not pl.Series([1]).is_in(pl.Series([1.99])).item() + assert not pl.Series([1]).is_in(pl.Series([1.99])).single() def test_is_in_float_list_10764() -> None: diff --git a/py-polars/tests/unit/operations/test_join.py b/py-polars/tests/unit/operations/test_join.py index eb157c42be61..d23555a19cf4 100644 --- a/py-polars/tests/unit/operations/test_join.py +++ b/py-polars/tests/unit/operations/test_join.py @@ -222,7 +222,7 @@ def test_right_join_schema_maintained_22516() -> None: .collect() ) - assert lazy_join.item() == eager_join.item() + assert lazy_join.single() == eager_join.single() def test_join() -> None: @@ -1737,7 +1737,7 @@ def test_select_after_join_where_20831() -> None: pl.Series("d", [None, None, 7, 8, 8, 8]).to_frame(), ) - assert q.select(pl.len()).collect().item() == 6 + assert q.select(pl.len()).collect().single() == 6 q = ( left.join(right, how="cross") @@ -1750,7 +1750,7 @@ def test_select_after_join_where_20831() -> None: pl.Series("d", [None, None, 7, 8, 8, 8]).to_frame(), ) - assert q.select(pl.len()).collect().item() == 6 + assert q.select(pl.len()).collect().single() == 6 @pytest.mark.parametrize( @@ -1871,7 +1871,7 @@ def test_select_len_after_semi_anti_join_21343() -> None: q = lhs.join(rhs, on="a", how="anti").select(pl.len()) - assert q.collect().item() == 0 + assert q.collect().single() == 0 def test_multi_leftjoin_empty_right_21701() -> None: @@ -3662,7 +3662,7 @@ def test_join_rewrite_null_preserving_exprs( .select(expr_func(pl.first())) .select(pl.first().is_null() | ~pl.first()) .to_series() - .item() + .single() ) q = lhs.join(rhs, on="a", how="left", maintain_order="left_right").filter( diff --git a/py-polars/tests/unit/operations/test_statistics.py b/py-polars/tests/unit/operations/test_statistics.py index 3df6ad7a8e7d..8e4104b70594 100644 --- a/py-polars/tests/unit/operations/test_statistics.py +++ b/py-polars/tests/unit/operations/test_statistics.py @@ -67,7 +67,7 @@ def test_cov_corr_f32_type() -> None: def test_cov(fruits_cars: pl.DataFrame) -> None: ldf = fruits_cars.lazy() for cov_ab in (pl.cov(pl.col("A"), pl.col("B")), pl.cov("A", "B")): - assert cast(float, ldf.select(cov_ab).collect().item()) == -2.5 + assert cast(float, ldf.select(cov_ab).collect().single()) == -2.5 def test_std(fruits_cars: pl.DataFrame) -> None: diff --git a/py-polars/tests/unit/operations/test_transpose.py b/py-polars/tests/unit/operations/test_transpose.py index 591cea081909..7cc0486ee08e 100644 --- a/py-polars/tests/unit/operations/test_transpose.py +++ b/py-polars/tests/unit/operations/test_transpose.py @@ -195,4 +195,4 @@ def test_transpose_multiple_chunks() -> None: def test_nested_struct_transpose_21923() -> None: df = pl.DataFrame({"x": [{"a": {"b": 1, "c": 2}}]}) - assert df.transpose().item() == df.item() + assert df.transpose().single() == df.single() diff --git a/py-polars/tests/unit/operations/test_value_counts.py b/py-polars/tests/unit/operations/test_value_counts.py index 7b1c2e25cf74..d306a459e2ea 100644 --- a/py-polars/tests/unit/operations/test_value_counts.py +++ b/py-polars/tests/unit/operations/test_value_counts.py @@ -80,7 +80,7 @@ def test_value_counts_duplicate_name() -> None: df = pl.DataFrame({"a": [None, 1, None, 2, 3]}) result = df.select(pl.col("a").count()) - assert result.item() == 3 + assert result.single() == 3 result = df.group_by(1).agg(pl.col("a").count()) assert result.to_dict(as_series=False) == {"literal": [1], "a": [3]} diff --git a/py-polars/tests/unit/operations/test_window.py b/py-polars/tests/unit/operations/test_window.py index 59b41fad5b88..654f0afc84a9 100644 --- a/py-polars/tests/unit/operations/test_window.py +++ b/py-polars/tests/unit/operations/test_window.py @@ -510,7 +510,7 @@ def test_window_chunked_std_17102() -> None: df = pl.concat([c1, c2], rechunk=False) out = df.select(pl.col("B").std().over("A").alias("std")) - assert out.unique().item() == 0.7071067811865476 + assert out.unique().single() == 0.7071067811865476 def test_window_17308() -> None: @@ -548,8 +548,10 @@ def test_order_by_sorted_keys_18943() -> None: def test_nested_window_keys() -> None: df = pl.DataFrame({"x": 1, "y": "two"}) - assert df.select(pl.col("y").first().over(pl.struct("x").implode())).item() == "two" - assert df.select(pl.col("y").first().over(pl.struct("x"))).item() == "two" + assert ( + df.select(pl.col("y").first().over(pl.struct("x").implode())).single() == "two" + ) + assert df.select(pl.col("y").first().over(pl.struct("x"))).single() == "two" def test_window_21692() -> None: diff --git a/py-polars/tests/unit/operations/unique/test_n_unique.py b/py-polars/tests/unit/operations/unique/test_n_unique.py index 978dc594e91c..4791ea52f2a3 100644 --- a/py-polars/tests/unit/operations/unique/test_n_unique.py +++ b/py-polars/tests/unit/operations/unique/test_n_unique.py @@ -76,4 +76,4 @@ def test_n_unique_array() -> None: ) assert df["arr"].dtype == pl.Array assert df.select(pl.col("arr")).n_unique() == 3 - assert df.select(pl.col("arr").n_unique()).item() == 3 + assert df.select(pl.col("arr").n_unique()).single() == 3 diff --git a/py-polars/tests/unit/operations/unique/test_unique.py b/py-polars/tests/unit/operations/unique/test_unique.py index 1135e9fec60c..33b6f2067341 100644 --- a/py-polars/tests/unit/operations/unique/test_unique.py +++ b/py-polars/tests/unit/operations/unique/test_unique.py @@ -218,7 +218,7 @@ def test_categorical_unique_19409() -> None: df = pl.DataFrame({"x": [str(n % 50) for n in range(127)]}).cast(pl.Categorical) uniq = df.unique() assert uniq.height == 50 - assert uniq.null_count().item() == 0 + assert uniq.null_count().single() == 0 assert set(uniq["x"]) == set(df["x"]) @@ -254,7 +254,7 @@ def test_unique_check_order_20480() -> None: .sort("key", "number") .unique(subset="key", keep="first") .collect()["number"] - .item() + .single() == 1 ) diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index 556bb04ef12d..da26b1580f9e 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -1021,11 +1021,11 @@ def test_mode() -> None: df = pl.DataFrame([s]) assert df.select([pl.col("a").mode()])["a"].to_list() == [1] assert ( - pl.Series(["foo", "bar", "buz", "bar"], dtype=pl.Categorical).mode().item() + pl.Series(["foo", "bar", "buz", "bar"], dtype=pl.Categorical).mode().single() == "bar" ) - assert pl.Series([1.0, 2.0, 3.0, 2.0]).mode().item() == 2.0 - assert pl.Series(["a", "b", "c", "b"]).mode().item() == "b" + assert pl.Series([1.0, 2.0, 3.0, 2.0]).mode().single() == 2.0 + assert pl.Series(["a", "b", "c", "b"]).mode().single() == "b" # sorted data assert set(pl.int_range(0, 3, eager=True).mode().to_list()) == {0, 1, 2} @@ -2054,8 +2054,8 @@ def test_upper_lower_bounds( dtype: PolarsDataType, upper: int | float, lower: int | float ) -> None: s = pl.Series("s", dtype=dtype) - assert s.lower_bound().item() == lower - assert s.upper_bound().item() == upper + assert s.lower_bound().single() == lower + assert s.upper_bound().single() == upper def test_numpy_series_arithmetic() -> None: diff --git a/py-polars/tests/unit/series/test_item.py b/py-polars/tests/unit/series/test_single.py similarity index 59% rename from py-polars/tests/unit/series/test_item.py rename to py-polars/tests/unit/series/test_single.py index 7d8be87ee946..5c306025ab7c 100644 --- a/py-polars/tests/unit/series/test_item.py +++ b/py-polars/tests/unit/series/test_single.py @@ -7,19 +7,27 @@ import polars as pl -def test_series_item() -> None: +def test_series_single() -> None: s = pl.Series("a", [1]) - assert s.item() == 1 + assert s.single() == 1 + with pytest.warns(DeprecationWarning): + assert s.item() == 1 -def test_series_item_empty() -> None: +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +def test_series_single_empty() -> None: s = pl.Series("a", []) + with pytest.raises(ValueError): + s.single() with pytest.raises(ValueError): s.item() -def test_series_item_incorrect_shape() -> None: +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +def test_series_single_incorrect_shape() -> None: s = pl.Series("a", [1, 2]) + with pytest.raises(ValueError): + s.single() with pytest.raises(ValueError): s.item() @@ -30,17 +38,20 @@ def s() -> pl.Series: @pytest.mark.parametrize(("index", "expected"), [(0, 1), (1, 2), (-1, 2), (-2, 1)]) +@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_series_item_with_index(index: int, expected: int, s: pl.Series) -> None: assert s.item(index) == expected @pytest.mark.parametrize("index", [-10, 10]) +@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_df_item_out_of_bounds(index: int, s: pl.Series) -> None: with pytest.raises(IndexError, match="out of bounds"): s.item(index) +@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_series_item_out_of_range_date() -> None: s = pl.Series([datetime.date(9999, 12, 31)]).dt.offset_by("1d") with pytest.raises(ValueError, match="out of range"): - s.item() + s.single() diff --git a/py-polars/tests/unit/sql/test_literals.py b/py-polars/tests/unit/sql/test_literals.py index ebf6834e0664..9fa6a65fdc30 100644 --- a/py-polars/tests/unit/sql/test_literals.py +++ b/py-polars/tests/unit/sql/test_literals.py @@ -106,7 +106,7 @@ def test_dollar_quoted_literals() -> None: assert df.to_dict(as_series=False) == {f"dq{n}": ["xyz"] for n in range(1, 5)} df = pl.sql("SELECT $$x$z$$ AS dq").collect() - assert df.item() == "x$z" + assert df.single() == "x$z" def test_fixed_intervals() -> None: diff --git a/py-polars/tests/unit/sql/test_strings.py b/py-polars/tests/unit/sql/test_strings.py index dcb9b983bc0b..4c1c561e3b61 100644 --- a/py-polars/tests/unit/sql/test_strings.py +++ b/py-polars/tests/unit/sql/test_strings.py @@ -272,7 +272,7 @@ def test_string_like_multiline() -> None: # exact match for s in (s1, s2, s3): - assert df.sql(f"SELECT txt FROM self WHERE txt LIKE '{s}'").item() == s + assert df.sql(f"SELECT txt FROM self WHERE txt LIKE '{s}'").single() == s @pytest.mark.parametrize("form", ["NFKC", "NFKD"]) diff --git a/py-polars/tests/unit/sql/test_structs.py b/py-polars/tests/unit/sql/test_structs.py index cbca9b7f8672..556b8a4d491d 100644 --- a/py-polars/tests/unit/sql/test_structs.py +++ b/py-polars/tests/unit/sql/test_structs.py @@ -151,7 +151,7 @@ def test_struct_field_operator_access(expr: str, expected: int | str) -> None: }, }, ) - assert df.sql(f"SELECT {expr} FROM self").item() == expected + assert df.sql(f"SELECT {expr} FROM self").single() == expected @pytest.mark.parametrize( diff --git a/py-polars/tests/unit/sql/test_table_operations.py b/py-polars/tests/unit/sql/test_table_operations.py index 7220a5809ea4..96b13cd27b5f 100644 --- a/py-polars/tests/unit/sql/test_table_operations.py +++ b/py-polars/tests/unit/sql/test_table_operations.py @@ -77,7 +77,7 @@ def test_explain_query(test_frame: pl.LazyFrame) -> None: ctx.execute("EXPLAIN SELECT * FROM frame") .select(pl.col("Logical Plan").str.join()) .collect() - .item() + .single() ) assert ( re.search( diff --git a/py-polars/tests/unit/streaming/test_streaming.py b/py-polars/tests/unit/streaming/test_streaming.py index 8dc7dc2bef4d..02e7c3864c26 100644 --- a/py-polars/tests/unit/streaming/test_streaming.py +++ b/py-polars/tests/unit/streaming/test_streaming.py @@ -379,6 +379,6 @@ def test_i128_sum_reduction() -> None: .lazy() .sum() .collect(engine="streaming") - .item() + .single() == 6 ) diff --git a/py-polars/tests/unit/streaming/test_streaming_group_by.py b/py-polars/tests/unit/streaming/test_streaming_group_by.py index f462ebed6b10..f79da04bbd8c 100644 --- a/py-polars/tests/unit/streaming/test_streaming_group_by.py +++ b/py-polars/tests/unit/streaming/test_streaming_group_by.py @@ -463,7 +463,7 @@ def test_streaming_group_by_binary_15116() -> None: def test_streaming_group_by_convert_15380(partition_limit: int) -> None: assert ( - pl.DataFrame({"a": [1] * partition_limit}).group_by(b="a").len()["len"].item() + pl.DataFrame({"a": [1] * partition_limit}).group_by(b="a").len()["len"].single() == partition_limit ) diff --git a/py-polars/tests/unit/test_datatype_exprs.py b/py-polars/tests/unit/test_datatype_exprs.py index d27bb51cb2b3..5cbf46369ed4 100644 --- a/py-polars/tests/unit/test_datatype_exprs.py +++ b/py-polars/tests/unit/test_datatype_exprs.py @@ -133,7 +133,7 @@ def test_classification(selector: cs.Selector, fn_tags: list[str]) -> None: dtype_expr = dtype.to_dtype_expr() expr = dtype_expr.matches(selector) expected = dtype_tag in fn_tags - assert pl.select(expr).to_series().item() == expected + assert pl.select(expr).to_series().single() == expected @pytest.mark.parametrize( @@ -148,7 +148,7 @@ def test_int_signed_classification(selector: cs.Selector, fn_tag: str) -> None: dtype_expr = dtype.to_dtype_expr() expr = dtype_expr.matches(selector) expected = dtype_tag == fn_tag - assert pl.select(expr).to_series().item() == expected + assert pl.select(expr).to_series().single() == expected def test_array_width_classification() -> None: @@ -157,21 +157,21 @@ def test_array_width_classification() -> None: assert ( pl.select(arr_dtype.to_dtype_expr().matches(cs.array(width=2))) .to_series() - .item() + .single() ) assert not ( pl.select(arr_dtype.to_dtype_expr().matches(cs.array(width=3))) .to_series() - .item() + .single() ) def test_array_width() -> None: arr_dtype = pl.Array(pl.String, 2) - assert pl.select(arr_dtype.to_dtype_expr().arr.width()).to_series().item() == 2 + assert pl.select(arr_dtype.to_dtype_expr().arr.width()).to_series().single() == 2 arr_dtype = pl.Array(pl.String, 3) - assert pl.select(arr_dtype.to_dtype_expr().arr.width()).to_series().item() == 3 + assert pl.select(arr_dtype.to_dtype_expr().arr.width()).to_series().single() == 3 def test_array_shape() -> None: @@ -202,19 +202,19 @@ def test_inner_dtype() -> None: assert ( pl.select(arr_dtype.to_dtype_expr().inner_dtype() == pl.String) .to_series() - .item() + .single() ) assert ( pl.select(arr_dtype.to_dtype_expr().arr.inner_dtype() == pl.String) .to_series() - .item() + .single() ) with pytest.raises(pl.exceptions.SchemaError): arr_dtype.to_dtype_expr().list.inner_dtype().collect_dtype({}) list_dtype = pl.List(pl.String).to_dtype_expr() - assert pl.select(list_dtype.inner_dtype() == pl.String).to_series().item() - assert pl.select(list_dtype.list.inner_dtype() == pl.String).to_series().item() + assert pl.select(list_dtype.inner_dtype() == pl.String).to_series().single() + assert pl.select(list_dtype.list.inner_dtype() == pl.String).to_series().single() with pytest.raises(pl.exceptions.SchemaError): list_dtype.arr.inner_dtype().collect_dtype({}) @@ -222,7 +222,7 @@ def test_inner_dtype() -> None: def test_display() -> None: for dtype, _, dtype_str, _ in DTYPES: assert ( - pl.select(dtype.to_dtype_expr().display()).to_series().item() == dtype_str + pl.select(dtype.to_dtype_expr().display()).to_series().single() == dtype_str ) @@ -320,7 +320,7 @@ def test_default_value_int(dtype: pl.DataType, numeric_to_one: bool) -> None: dtype.to_dtype_expr().default_value(numeric_to_one=numeric_to_one) ).to_series() assert result.dtype == dtype - assert result.item() == (1 if numeric_to_one else 0) + assert result.single() == (1 if numeric_to_one else 0) @pytest.mark.parametrize("dtype", sorted(FLOAT_DTYPES, key=lambda v: str(v))) @@ -330,32 +330,32 @@ def test_default_value_float(dtype: pl.DataType, numeric_to_one: bool) -> None: dtype.to_dtype_expr().default_value(numeric_to_one=numeric_to_one) ).to_series() assert result.dtype == dtype - assert result.item() == (1.0 if numeric_to_one else 0.0) + assert result.single() == (1.0 if numeric_to_one else 0.0) def test_default_value_string() -> None: result = pl.select(pl.String().to_dtype_expr().default_value()).to_series() assert result.dtype == pl.String() - assert result.item() == "" + assert result.single() == "" def test_default_value_binary() -> None: result = pl.select(pl.String().to_dtype_expr().default_value()).to_series() assert result.dtype == pl.String() - assert result.item() == "" + assert result.single() == "" def test_default_value_decimal() -> None: result = pl.select(pl.Decimal(scale=2).to_dtype_expr().default_value()).to_series() assert result.dtype == pl.Decimal(scale=2) - assert result.item() == 0 + assert result.single() == 0 @pytest.mark.parametrize("dtype", sorted(TEMPORAL_DTYPES, key=lambda v: str(v))) def test_default_value_temporal(dtype: pl.DataType) -> None: result = pl.select(dtype.to_dtype_expr().default_value()).to_series() assert result.dtype == dtype - assert result.to_physical().item() == 0 + assert result.to_physical().single() == 0 @pytest.mark.parametrize("numeric_to_one", [False, True]) @@ -395,33 +395,33 @@ def test_default_value_object() -> None: dtype = pl.Object() result = pl.select(dtype.to_dtype_expr().default_value()).to_series() assert result.dtype == dtype - assert result.item() is None + assert result.single() is None def test_default_value_null() -> None: dtype = pl.Null() result = pl.select(dtype.to_dtype_expr().default_value()).to_series() assert result.dtype == dtype - assert result.item() is None + assert result.single() is None def test_default_value_categorical() -> None: dtype = pl.Categorical() result = pl.select(dtype.to_dtype_expr().default_value()).to_series() assert result.dtype == dtype - assert result.item() is None + assert result.single() is None def test_default_value_enum() -> None: dtype = pl.Enum([]) result = pl.select(dtype.to_dtype_expr().default_value()).to_series() assert result.dtype == dtype - assert result.item() is None + assert result.single() is None dtype = pl.Enum(["a", "b", "c"]) result = pl.select(dtype.to_dtype_expr().default_value()).to_series() assert result.dtype == dtype - assert result.item() == "a" + assert result.single() == "a" @pytest.mark.parametrize("n", [0, 1, 2, 5]) diff --git a/py-polars/tests/unit/test_datatypes.py b/py-polars/tests/unit/test_datatypes.py index e30cd107f0d1..b1bb3dc41abd 100644 --- a/py-polars/tests/unit/test_datatypes.py +++ b/py-polars/tests/unit/test_datatypes.py @@ -230,5 +230,5 @@ def test_max_min( lower: int | float | time, ) -> None: df = pl.select(min=dtype.min(), max=dtype.max()) - assert df.to_series(0).item() == lower - assert df.to_series(1).item() == upper + assert df.to_series(0).single() == lower + assert df.to_series(1).single() == upper diff --git a/py-polars/tests/unit/test_expansion.py b/py-polars/tests/unit/test_expansion.py index d4bb05440dab..464042ad94b5 100644 --- a/py-polars/tests/unit/test_expansion.py +++ b/py-polars/tests/unit/test_expansion.py @@ -80,8 +80,8 @@ def test_struct_name_resolving_15430() -> None: .collect(optimizations=pl.QueryOptFlags(projection_pushdown=True)) ) - assert a["b"].item() == "c" - assert b["b"].item() == "c" + assert a["b"].single() == "c" + assert b["b"].single() == "c" assert a.columns == ["b"] assert b.columns == ["b"] diff --git a/py-polars/tests/unit/test_format.py b/py-polars/tests/unit/test_format.py index e0247a847dc3..48e3a5b2ea89 100644 --- a/py-polars/tests/unit/test_format.py +++ b/py-polars/tests/unit/test_format.py @@ -509,4 +509,4 @@ def test_format_ascii_table_truncation(df: pl.DataFrame, expected: str) -> None: def test_format_21393() -> None: - assert pl.select(pl.format("{}", pl.lit(1, pl.Int128))).item() == "1" + assert pl.select(pl.format("{}", pl.lit(1, pl.Int128))).single() == "1" diff --git a/py-polars/tests/unit/test_polars_import.py b/py-polars/tests/unit/test_polars_import.py index 798257cb653a..60d3bf40f731 100644 --- a/py-polars/tests/unit/test_polars_import.py +++ b/py-polars/tests/unit/test_polars_import.py @@ -20,7 +20,7 @@ def _import_time_from_frame(tm: pl.DataFrame) -> int: return int( tm.filter(pl.col("import").str.strip_chars() == "polars") .select("cumulative_time") - .item() + .single() ) diff --git a/py-polars/tests/unit/test_projections.py b/py-polars/tests/unit/test_projections.py index 84f7e3a72085..a9271a557fb0 100644 --- a/py-polars/tests/unit/test_projections.py +++ b/py-polars/tests/unit/test_projections.py @@ -483,7 +483,7 @@ def test_non_coalesce_join_projection_pushdown_16515( left.join(right, how=join_type, left_on="x", right_on="y", coalesce=False) .select("y") .collect() - .item() + .single() == 1 ) @@ -645,7 +645,7 @@ def test_select_len_20337() -> None: ) q = q.with_row_index("foo") - assert q.select(pl.len()).collect().item() == 3 + assert q.select(pl.len()).collect().single() == 3 def test_filter_count_projection_20902() -> None: diff --git a/py-polars/tests/unit/test_row_encoding.py b/py-polars/tests/unit/test_row_encoding.py index a3f30bf5b567..5e944c230824 100644 --- a/py-polars/tests/unit/test_row_encoding.py +++ b/py-polars/tests/unit/test_row_encoding.py @@ -122,8 +122,8 @@ def test_bool(field: Any) -> None: @pytest.mark.parametrize("dtype", INTEGER_DTYPES) @pytest.mark.parametrize("field", FIELD_COMBS_ARGS) def test_int(dtype: pl.DataType, field: Any) -> None: - min = pl.select(x=dtype.min()).item() # type: ignore[attr-defined] - max = pl.select(x=dtype.max()).item() # type: ignore[attr-defined] + min = pl.select(x=dtype.min()).single() # type: ignore[attr-defined] + max = pl.select(x=dtype.max()).single() # type: ignore[attr-defined] roundtrip_series_re([], dtype, **field) roundtrip_series_re([0], dtype, **field) diff --git a/py-polars/tests/unit/test_scalar.py b/py-polars/tests/unit/test_scalar.py index 9e51375a8aa9..8ad043c6e1c6 100644 --- a/py-polars/tests/unit/test_scalar.py +++ b/py-polars/tests/unit/test_scalar.py @@ -62,7 +62,7 @@ def test_scalar_len_20046() -> None: ) .select(pl.len()) .collect() - .item() + .single() == 3 ) @@ -71,7 +71,7 @@ def test_scalar_len_20046() -> None: pl.col("a").alias("b"), ) - assert q.select(pl.len()).collect().item() == 3 + assert q.select(pl.len()).collect().single() == 3 def test_scalar_identification_function_expr_in_binary() -> None: diff --git a/py-polars/tests/unit/test_selectors.py b/py-polars/tests/unit/test_selectors.py index 070a808e9d9a..a2cfd80ae32f 100644 --- a/py-polars/tests/unit/test_selectors.py +++ b/py-polars/tests/unit/test_selectors.py @@ -993,7 +993,7 @@ def test_expand_more_than_one_22567() -> None: assert ( pl.select(x=1, y=2) .select(cs.by_name("x").as_expr() + cs.by_name("y").as_expr()) - .item() + .single() == 3 ) @@ -1028,25 +1028,25 @@ def test_selector_arith_dtypes_12850() -> None: assert ( pl.DataFrame({"a": [2.0], "b": [1]}) .select(cs.float().as_expr() - cs.integer().as_expr()) - .item() + .single() == 1.0 ) assert ( pl.DataFrame({"a": [2.0], "b": [1]}) .select(cs.float().as_expr() + cs.integer().as_expr()) - .item() + .single() == 3.0 ) assert ( pl.DataFrame({"a": [2.0], "b": [1]}) .select(cs.float().as_expr() - cs.last().as_expr()) - .item() + .single() == 1.0 ) assert ( pl.DataFrame({"a": [2.0], "b": [1]}) .select(cs.float().as_expr() - cs.by_name("b").as_expr()) - .item() + .single() == 1.0 ) From dba171beef24735885af1bef106511b4a4c95a7f Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Thu, 16 Oct 2025 17:22:56 +0200 Subject: [PATCH 14/24] Revert "Replace .item() with .single() in tests" This reverts commit 47df54e04eeddfb55e8c0a5bab42ebe9d67f0edc. --- crates/polars-plan/src/dsl/format.rs | 2 +- py-polars/src/polars/dataframe/frame.py | 2 +- py-polars/src/polars/series/series.py | 18 +++--- .../unit/constructors/test_constructors.py | 2 +- py-polars/tests/unit/dataframe/test_df.py | 14 ++-- .../{test_single.py => test_item.py} | 24 ++----- .../tests/unit/datatypes/test_categorical.py | 4 +- .../tests/unit/datatypes/test_decimal.py | 2 +- py-polars/tests/unit/datatypes/test_enum.py | 8 +-- py-polars/tests/unit/datatypes/test_float.py | 12 ++-- .../tests/unit/datatypes/test_integer.py | 4 +- py-polars/tests/unit/datatypes/test_object.py | 2 +- .../tests/unit/datatypes/test_temporal.py | 64 +++++++++---------- py-polars/tests/unit/expr/test_exprs.py | 6 +- py-polars/tests/unit/expr/test_literal.py | 12 ++-- .../functions/as_datatype/test_datetime.py | 16 ++--- .../functions/as_datatype/test_duration.py | 5 +- .../functions/range/test_datetime_range.py | 2 +- .../unit/functions/test_business_day_count.py | 2 +- .../tests/unit/functions/test_functions.py | 10 ++- py-polars/tests/unit/functions/test_lit.py | 18 +++--- .../tests/unit/functions/test_when_then.py | 2 +- .../tests/unit/interchange/test_column.py | 4 +- .../tests/unit/io/database/test_write.py | 2 +- py-polars/tests/unit/io/test_hive.py | 6 +- py-polars/tests/unit/io/test_io_plugin.py | 2 +- .../tests/unit/io/test_lazy_count_star.py | 10 ++- py-polars/tests/unit/io/test_lazy_parquet.py | 2 +- py-polars/tests/unit/io/test_parquet.py | 4 +- py-polars/tests/unit/io/test_scan.py | 2 +- .../tests/unit/io/test_scan_row_deletion.py | 4 +- .../tests/unit/lazyframe/test_collect_all.py | 4 +- .../tests/unit/lazyframe/test_lazyframe.py | 10 +-- .../lazyframe/test_order_observability.py | 4 +- py-polars/tests/unit/ml/test_torch.py | 2 +- .../aggregation/test_aggregations.py | 4 +- .../operations/aggregation/test_vertical.py | 2 +- .../namespaces/string/test_concat.py | 12 ++-- .../namespaces/string/test_string.py | 14 ++-- .../temporal/test_add_business_days.py | 2 +- .../namespaces/temporal/test_datetime.py | 18 +++--- .../temporal/test_month_start_end.py | 8 +-- .../namespaces/temporal/test_round.py | 60 ++++++++--------- .../namespaces/temporal/test_to_datetime.py | 4 +- .../namespaces/temporal/test_truncate.py | 8 +-- .../unit/operations/namespaces/test_binary.py | 4 +- .../operations/namespaces/test_strptime.py | 58 ++++++++--------- .../unit/operations/rolling/test_rolling.py | 6 +- py-polars/tests/unit/operations/test_cast.py | 48 +++++++------- .../tests/unit/operations/test_comparison.py | 6 +- .../tests/unit/operations/test_fill_null.py | 4 +- .../tests/unit/operations/test_has_nulls.py | 2 +- .../tests/unit/operations/test_index_of.py | 6 +- py-polars/tests/unit/operations/test_is_in.py | 4 +- py-polars/tests/unit/operations/test_join.py | 10 +-- .../tests/unit/operations/test_statistics.py | 2 +- .../tests/unit/operations/test_transpose.py | 2 +- .../unit/operations/test_value_counts.py | 2 +- .../tests/unit/operations/test_window.py | 8 +-- .../unit/operations/unique/test_n_unique.py | 2 +- .../unit/operations/unique/test_unique.py | 4 +- .../series/{test_single.py => test_item.py} | 21 ++---- py-polars/tests/unit/series/test_series.py | 10 +-- py-polars/tests/unit/sql/test_literals.py | 2 +- py-polars/tests/unit/sql/test_strings.py | 2 +- py-polars/tests/unit/sql/test_structs.py | 2 +- .../tests/unit/sql/test_table_operations.py | 2 +- .../tests/unit/streaming/test_streaming.py | 2 +- .../unit/streaming/test_streaming_group_by.py | 2 +- py-polars/tests/unit/test_datatype_exprs.py | 44 ++++++------- py-polars/tests/unit/test_datatypes.py | 4 +- py-polars/tests/unit/test_expansion.py | 4 +- py-polars/tests/unit/test_format.py | 2 +- py-polars/tests/unit/test_polars_import.py | 2 +- py-polars/tests/unit/test_projections.py | 4 +- py-polars/tests/unit/test_row_encoding.py | 4 +- py-polars/tests/unit/test_scalar.py | 4 +- py-polars/tests/unit/test_selectors.py | 10 +-- 78 files changed, 334 insertions(+), 374 deletions(-) rename py-polars/tests/unit/dataframe/{test_single.py => test_item.py} (61%) rename py-polars/tests/unit/series/{test_single.py => test_item.py} (59%) diff --git a/crates/polars-plan/src/dsl/format.rs b/crates/polars-plan/src/dsl/format.rs index e080f8364a5d..18c89a3984ec 100644 --- a/crates/polars-plan/src/dsl/format.rs +++ b/crates/polars-plan/src/dsl/format.rs @@ -113,7 +113,7 @@ impl fmt::Debug for Expr { Mean(expr) => write!(f, "{expr:?}.mean()"), First(expr) => write!(f, "{expr:?}.first()"), Last(expr) => write!(f, "{expr:?}.last()"), - Single(expr) => write!(f, "{expr:?}.single()"), + Single(expr) => write!(f, "{expr:?}.item()"), Implode(expr) => write!(f, "{expr:?}.list()"), NUnique(expr) => write!(f, "{expr:?}.n_unique()"), Sum(expr) => write!(f, "{expr:?}.sum()"), diff --git a/py-polars/src/polars/dataframe/frame.py b/py-polars/src/polars/dataframe/frame.py index 8124c53750b3..f1d09f36d4d2 100644 --- a/py-polars/src/polars/dataframe/frame.py +++ b/py-polars/src/polars/dataframe/frame.py @@ -1648,7 +1648,7 @@ def collect_schema(self) -> Schema: return self.schema @deprecated( - "`DataFrame.item()` is deprecated; " + "`DataFrame.item` is deprecated; " "for unpacking a single value out of a dataframe as a scalar, use `DataFrame.single()`; " "for element retrieval, use `Dataframe[row, col]` instead; " ) diff --git a/py-polars/src/polars/series/series.py b/py-polars/src/polars/series/series.py index 4d6966b486f9..cac137da4303 100644 --- a/py-polars/src/polars/series/series.py +++ b/py-polars/src/polars/series/series.py @@ -1341,7 +1341,7 @@ def __deepcopy__(self, memo: None = None) -> Self: def __contains__(self, item: Any) -> bool: if item is None: return self.has_nulls() - return self.implode().list.contains(item).single() + return self.implode().list.contains(item).item() def __iter__(self) -> Generator[Any]: if self.dtype in (List, Array): @@ -1609,7 +1609,7 @@ def _repr_html_(self) -> str: return self.to_frame()._repr_html_(_from_series=True) @deprecated( - "`Series.item()` is deprecated; " + "`Series.item` is deprecated; " "for unpacking a single value out of a dataframe as a scalar, use `Series.single()`; " "for element retrieval, use `Series[index]` instead; " ) @@ -1623,7 +1623,7 @@ def item(self, index: int | None = None) -> Any: Examples -------- >>> s1 = pl.Series("a", [1]) - >>> s1.single() + >>> s1.item() 1 >>> s2 = pl.Series("a", [9, 8, 7]) >>> s2.cum_sum().item(-1) @@ -1649,7 +1649,7 @@ def single(self) -> Any: """ if len(self) != 1: msg = ( - "can only call '.single()' if the Series is of length 1," + "can only call '.item()' if the Series is of length 1," f" or an explicit index is provided (Series is of length {len(self)})" ) raise ValueError(msg) @@ -2204,7 +2204,7 @@ def nan_max(self) -> int | float | date | datetime | timedelta | str: >>> s.nan_max() nan """ - return self.to_frame().select_seq(F.col(self.name).nan_max()).single() + return self.to_frame().select_seq(F.col(self.name).nan_max()).item() def nan_min(self) -> int | float | date | datetime | timedelta | str: """ @@ -2223,7 +2223,7 @@ def nan_min(self) -> int | float | date | datetime | timedelta | str: >>> s.nan_min() nan """ - return self.to_frame().select_seq(F.col(self.name).nan_min()).single() + return self.to_frame().select_seq(F.col(self.name).nan_min()).item() def std(self, ddof: int = 1) -> float | timedelta | None: """ @@ -2775,7 +2775,7 @@ def entropy(self, base: float = math.e, *, normalize: bool = True) -> float | No self.to_frame() .select_seq(F.col(self.name).entropy(base, normalize=normalize)) .to_series() - .single() + .item() ) @unstable() @@ -3736,7 +3736,7 @@ def search_sorted( elif _check_for_numpy(element) and isinstance(element, np.ndarray): return df.to_series() else: - return df.single() + return df.item() def unique(self, *, maintain_order: bool = False) -> Series: """ @@ -5136,7 +5136,7 @@ def index_of(self, element: IntoExpr) -> int | None: >>> s.index_of(55) is None True """ - return F.select(F.lit(self).index_of(element)).single() + return F.select(F.lit(self).index_of(element)).item() def clear(self, n: int = 0) -> Series: """ diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py index ed420a8f2c27..cfcde1c18fef 100644 --- a/py-polars/tests/unit/constructors/test_constructors.py +++ b/py-polars/tests/unit/constructors/test_constructors.py @@ -1273,7 +1273,7 @@ def test_from_rows_dtype() -> None: dc = _TestBazDC(d=datetime(2020, 2, 22), e=42.0, f="xyz") df = pl.DataFrame([[dc]], schema={"d": pl.Object}) assert df.schema == {"d": pl.Object} - assert df.single() == dc + assert df.item() == dc def test_from_dicts_schema() -> None: diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index ac81b5eb9896..121ab97c569d 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -1061,7 +1061,7 @@ def test_is_nan_null_series() -> None: def test_len() -> None: df = pl.DataFrame({"nrs": [1, 2, 3]}) - assert cast("int", df.select(pl.col("nrs").len()).single()) == 3 + assert cast("int", df.select(pl.col("nrs").len()).item()) == 3 assert len(pl.DataFrame()) == 0 @@ -2543,10 +2543,10 @@ def test_fill_null_limits() -> None: def test_lower_bound_upper_bound(fruits_cars: pl.DataFrame) -> None: res_expr = fruits_cars.select(pl.col("A").lower_bound()) - assert res_expr.single() == -9223372036854775808 + assert res_expr.item() == -9223372036854775808 res_expr = fruits_cars.select(pl.col("B").upper_bound()) - assert res_expr.single() == 9223372036854775807 + assert res_expr.item() == 9223372036854775807 with pytest.raises(ComputeError): fruits_cars.select(pl.col("fruits").upper_bound()) @@ -2921,7 +2921,7 @@ def test_init_vs_strptime_consistency( pl.Datetime("us", dtype_time_zone) ) assert result_init.dtype == pl.Datetime("us", expected_time_zone) - assert result_init.single() == expected_item + assert result_init.item() == expected_item assert_series_equal(result_init, result_strptime) @@ -2929,12 +2929,12 @@ def test_init_vs_strptime_consistency_converts() -> None: result = pl.Series( [datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=-8)))], dtype=pl.Datetime("us", "US/Pacific"), - ).single() + ).item() assert result == datetime(2020, 1, 1, 0, 0, tzinfo=ZoneInfo(key="US/Pacific")) result = ( pl.Series(["2020-01-01 00:00-08:00"]) .str.strptime(pl.Datetime("us", "US/Pacific")) - .single() + .item() ) assert result == datetime(2020, 1, 1, 0, 0, tzinfo=ZoneInfo(key="US/Pacific")) @@ -3103,7 +3103,7 @@ def test_round() -> None: def test_dot() -> None: df = pl.DataFrame({"a": [1.8, 1.2, 3.0], "b": [3.2, 1, 2]}) - assert df.select(pl.col("a").dot(pl.col("b"))).single() == 12.96 + assert df.select(pl.col("a").dot(pl.col("b"))).item() == 12.96 def test_unstack() -> None: diff --git a/py-polars/tests/unit/dataframe/test_single.py b/py-polars/tests/unit/dataframe/test_item.py similarity index 61% rename from py-polars/tests/unit/dataframe/test_single.py rename to py-polars/tests/unit/dataframe/test_item.py index 3986fb2a5326..12f9d87c913f 100644 --- a/py-polars/tests/unit/dataframe/test_single.py +++ b/py-polars/tests/unit/dataframe/test_item.py @@ -5,36 +5,25 @@ import polars as pl -def test_df_single() -> None: +def test_df_item() -> None: df = pl.DataFrame({"a": [1]}) - assert df.single() == 1 - with pytest.warns(DeprecationWarning): - assert df.item() == 1 + assert df.item() == 1 -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_df_single_empty() -> None: +def test_df_item_empty() -> None: df = pl.DataFrame() - with pytest.raises(ValueError, match=r".* frame has shape \(0, 0\)"): - df.single() with pytest.raises(ValueError, match=r".* frame has shape \(0, 0\)"): df.item() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_df_single_incorrect_shape_rows() -> None: +def test_df_item_incorrect_shape_rows() -> None: df = pl.DataFrame({"a": [1, 2]}) - with pytest.raises(ValueError, match=r".* frame has shape \(2, 1\)"): - df.single() with pytest.raises(ValueError, match=r".* frame has shape \(2, 1\)"): df.item() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_df_single_incorrect_shape_columns() -> None: +def test_df_item_incorrect_shape_columns() -> None: df = pl.DataFrame({"a": [1], "b": [2]}) - with pytest.raises(ValueError, match=r".* frame has shape \(1, 2\)"): - df.single() with pytest.raises(ValueError, match=r".* frame has shape \(1, 2\)"): df.item() @@ -53,14 +42,12 @@ def df() -> pl.DataFrame: (-2, "b", 5), ], ) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_df_item_with_indices( row: int, col: int | str, expected: int, df: pl.DataFrame ) -> None: assert df.item(row, col) == expected -@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_df_item_with_single_index(df: pl.DataFrame) -> None: with pytest.raises(ValueError): df.item(0) @@ -73,7 +60,6 @@ def test_df_item_with_single_index(df: pl.DataFrame) -> None: @pytest.mark.parametrize( ("row", "col"), [(0, 10), (10, 0), (10, 10), (-10, 0), (-10, 10)] ) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_df_item_out_of_bounds(row: int, col: int, df: pl.DataFrame) -> None: with pytest.raises(IndexError, match="out of bounds"): df.item(row, col) diff --git a/py-polars/tests/unit/datatypes/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py index 8e58dfd4450c..444a7f85339a 100644 --- a/py-polars/tests/unit/datatypes/test_categorical.py +++ b/py-polars/tests/unit/datatypes/test_categorical.py @@ -466,7 +466,7 @@ def test_categorical_asof_join_by_arg() -> None: def test_categorical_list_get_item() -> None: - out = pl.Series([["a"]]).cast(pl.List(pl.Categorical)).single() + out = pl.Series([["a"]]).cast(pl.List(pl.Categorical)).item() assert isinstance(out, pl.Series) assert out.dtype == pl.Categorical @@ -538,7 +538,7 @@ def test_fast_unique_flag_from_arrow() -> None: ).with_columns([pl.col("colB").cast(pl.Categorical)]) filtered = df.to_arrow().filter([True, False, True, True, False, True, True, True]) - assert pl.from_arrow(filtered).select(pl.col("colB").n_unique()).single() == 4 # type: ignore[union-attr] + assert pl.from_arrow(filtered).select(pl.col("colB").n_unique()).item() == 4 # type: ignore[union-attr] def test_construct_with_null() -> None: diff --git a/py-polars/tests/unit/datatypes/test_decimal.py b/py-polars/tests/unit/datatypes/test_decimal.py index ffcbb9c5d6f2..b963c67c9392 100644 --- a/py-polars/tests/unit/datatypes/test_decimal.py +++ b/py-polars/tests/unit/datatypes/test_decimal.py @@ -157,7 +157,7 @@ def test_decimal_cast_no_scale() -> None: def test_decimal_scale_precision_roundtrip(monkeypatch: Any) -> None: - assert pl.from_arrow(pl.Series("dec", [D("10.0")]).to_arrow()).single() == D("10.0") + assert pl.from_arrow(pl.Series("dec", [D("10.0")]).to_arrow()).item() == D("10.0") def test_string_to_decimal() -> None: diff --git a/py-polars/tests/unit/datatypes/test_enum.py b/py-polars/tests/unit/datatypes/test_enum.py index 2b2ed97b27f5..2722ff53fe2a 100644 --- a/py-polars/tests/unit/datatypes/test_enum.py +++ b/py-polars/tests/unit/datatypes/test_enum.py @@ -547,8 +547,8 @@ def test_enum_cse_eq() -> None: .collect() ) - assert out["dt1"].single() == "a" - assert out["dt2"].single() == "a" + assert out["dt1"].item() == "a" + assert out["dt2"].item() == "a" assert out["dt1"].dtype == pl.Enum(["a", "b"]) assert out["dt2"].dtype == pl.Enum(["a", "c"]) assert out["dt1"].dtype != out["dt2"].dtype @@ -566,8 +566,8 @@ def test_category_comparison_subset() -> None: .collect() ) - assert out["dt1"].single() == "a" - assert out["dt2"].single() == "a" + assert out["dt1"].item() == "a" + assert out["dt2"].item() == "a" assert out["dt1"].dtype == pl.Enum(["a"]) assert out["dt2"].dtype == pl.Enum(["a", "b"]) assert out["dt1"].dtype != out["dt2"].dtype diff --git a/py-polars/tests/unit/datatypes/test_float.py b/py-polars/tests/unit/datatypes/test_float.py index c7f3bee88061..0ab3ca1584a2 100644 --- a/py-polars/tests/unit/datatypes/test_float.py +++ b/py-polars/tests/unit/datatypes/test_float.py @@ -14,12 +14,8 @@ def test_nan_in_group_by_agg() -> None: } ) - assert ( - df.group_by("bar", "key").agg(pl.col("value").max())["value"].single() == 18.78 - ) - assert ( - df.group_by("bar", "key").agg(pl.col("value").min())["value"].single() == 18.58 - ) + assert df.group_by("bar", "key").agg(pl.col("value").max())["value"].item() == 18.78 + assert df.group_by("bar", "key").agg(pl.col("value").min())["value"].item() == 18.58 def test_nan_aggregations() -> None: @@ -146,8 +142,8 @@ def test_hash() -> None: ).hash() # check them against each other since hash is not stable - assert s[0] == s[1] # hash(-0.0) == hash(0.0) - assert s[2] == s[3] # hash(float('-nan')) == hash(float('nan')) + assert s.item(0) == s.item(1) # hash(-0.0) == hash(0.0) + assert s.item(2) == s.item(3) # hash(float('-nan')) == hash(float('nan')) def test_group_by_float() -> None: diff --git a/py-polars/tests/unit/datatypes/test_integer.py b/py-polars/tests/unit/datatypes/test_integer.py index ad0a9446b3d4..ec649dd0a87e 100644 --- a/py-polars/tests/unit/datatypes/test_integer.py +++ b/py-polars/tests/unit/datatypes/test_integer.py @@ -27,5 +27,5 @@ def test_int_negate_operation() -> None: def test_compare_zero_with_uint64_16798() -> None: df = pl.Series("a", [(1 << 63), 0], dtype=pl.UInt64).to_frame() - assert df.select(pl.col("a") >= 0)[0, 0] - assert df.select(pl.col("a") == 0)[0, 0] is False + assert df.select(pl.col("a") >= 0).item(0, 0) + assert df.select(pl.col("a") == 0).item(0, 0) is False diff --git a/py-polars/tests/unit/datatypes/test_object.py b/py-polars/tests/unit/datatypes/test_object.py index 7e2ca5c1c61c..9adb71bcaa65 100644 --- a/py-polars/tests/unit/datatypes/test_object.py +++ b/py-polars/tests/unit/datatypes/test_object.py @@ -243,7 +243,7 @@ def test_object_null_slice() -> None: @pytest.mark.may_fail_cloud # reason: Object type not supported def test_object_sort_scalar_19925() -> None: a = object() - assert pl.DataFrame({"a": [0], "obj": [a]}).sort("a")["obj"].single() == a + assert pl.DataFrame({"a": [0], "obj": [a]}).sort("a")["obj"].item() == a def test_object_estimated_size() -> None: diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py index 8568c5b86b77..d57e62aec95b 100644 --- a/py-polars/tests/unit/datatypes/test_temporal.py +++ b/py-polars/tests/unit/datatypes/test_temporal.py @@ -1312,7 +1312,7 @@ def test_replace_time_zone_from_to( time_unit: TimeUnit, ) -> None: ts = pl.Series(["2020-01-01"]).str.strptime(pl.Datetime(time_unit)) - result = ts.dt.replace_time_zone(from_tz).dt.replace_time_zone(to_tz).single() + result = ts.dt.replace_time_zone(from_tz).dt.replace_time_zone(to_tz).item() expected = datetime(2020, 1, 1, 0, 0, tzinfo=tzinfo) assert result == expected @@ -1321,7 +1321,7 @@ def test_strptime_with_tz() -> None: result = ( pl.Series(["2020-01-01 03:00:00"]) .str.strptime(pl.Datetime("us", "Africa/Monrovia")) - .single() + .item() ) assert result == datetime(2020, 1, 1, 3, tzinfo=ZoneInfo("Africa/Monrovia")) @@ -1388,11 +1388,11 @@ def test_convert_time_zone_lazy_schema() -> None: def test_convert_time_zone_on_tz_naive() -> None: ts = pl.Series(["2020-01-01"]).str.strptime(pl.Datetime) - result = ts.dt.convert_time_zone("Asia/Kathmandu").single() + result = ts.dt.convert_time_zone("Asia/Kathmandu").item() expected = datetime(2020, 1, 1, 5, 45, tzinfo=ZoneInfo("Asia/Kathmandu")) assert result == expected result = ( - ts.dt.replace_time_zone("UTC").dt.convert_time_zone("Asia/Kathmandu").single() + ts.dt.replace_time_zone("UTC").dt.convert_time_zone("Asia/Kathmandu").item() ) assert result == expected @@ -1495,7 +1495,7 @@ def test_replace_time_zone_ambiguous_with_ambiguous( ambiguous: Ambiguous, expected: datetime ) -> None: ts = pl.Series(["2018-10-28 02:30:00"]).str.strptime(pl.Datetime) - result = ts.dt.replace_time_zone("Europe/Brussels", ambiguous=ambiguous).single() + result = ts.dt.replace_time_zone("Europe/Brussels", ambiguous=ambiguous).item() assert result == expected @@ -1693,7 +1693,7 @@ def test_single_ambiguous_null() -> None: pl.col("ts").dt.replace_time_zone( "Europe/London", ambiguous=pl.col("ambiguous") ) - )["ts"].single() + )["ts"].item() assert result is None @@ -1702,7 +1702,7 @@ def test_unlocalize() -> None: tz_aware = tz_naive.dt.replace_time_zone("UTC").dt.convert_time_zone( "Europe/Brussels" ) - result = tz_aware.dt.replace_time_zone(None).single() + result = tz_aware.dt.replace_time_zone(None).item() assert result == datetime(2020, 1, 1, 4) @@ -1847,7 +1847,7 @@ def test_tz_aware_with_timezone_directive( ) -> None: tz_naive = pl.Series(["2020-01-01 03:00:00"]).str.strptime(pl.Datetime) tz_aware = tz_naive.dt.replace_time_zone(time_zone) - result = tz_aware.dt.to_string(directive).single() + result = tz_aware.dt.to_string(directive).item() assert result == expected @@ -2192,7 +2192,7 @@ def test_truncate_non_existent_14957() -> None: def test_cast_time_to_duration() -> None: assert pl.Series([time(hour=0, minute=0, second=2)]).cast( pl.Duration - ).single() == timedelta(seconds=2) + ).item() == timedelta(seconds=2) def test_tz_aware_day_weekday() -> None: @@ -2293,21 +2293,21 @@ def test_infer_iso8601_datetime(iso8601_format_datetime: str) -> None: .replace("%9f", "123456789") ) parsed = pl.Series([time_string]).str.strptime(pl.Datetime("ns")) - assert parsed.dt.year().single() == 2134 - assert parsed.dt.month().single() == 12 - assert parsed.dt.day().single() == 13 + assert parsed.dt.year().item() == 2134 + assert parsed.dt.month().item() == 12 + assert parsed.dt.day().item() == 13 if "%H" in iso8601_format_datetime: - assert parsed.dt.hour().single() == 1 + assert parsed.dt.hour().item() == 1 if "%M" in iso8601_format_datetime: - assert parsed.dt.minute().single() == 12 + assert parsed.dt.minute().item() == 12 if "%S" in iso8601_format_datetime: - assert parsed.dt.second().single() == 34 + assert parsed.dt.second().item() == 34 if "%9f" in iso8601_format_datetime: - assert parsed.dt.nanosecond().single() == 123456789 + assert parsed.dt.nanosecond().item() == 123456789 if "%6f" in iso8601_format_datetime: - assert parsed.dt.nanosecond().single() == 123456000 + assert parsed.dt.nanosecond().item() == 123456000 if "%3f" in iso8601_format_datetime: - assert parsed.dt.nanosecond().single() == 123000000 + assert parsed.dt.nanosecond().item() == 123000000 def test_infer_iso8601_tz_aware_datetime(iso8601_tz_aware_format_datetime: str) -> None: @@ -2325,21 +2325,21 @@ def test_infer_iso8601_tz_aware_datetime(iso8601_tz_aware_format_datetime: str) .replace("%#z", "+01:00") ) parsed = pl.Series([time_string]).str.strptime(pl.Datetime("ns")) - assert parsed.dt.year().single() == 2134 - assert parsed.dt.month().single() == 12 - assert parsed.dt.day().single() == 13 + assert parsed.dt.year().item() == 2134 + assert parsed.dt.month().item() == 12 + assert parsed.dt.day().item() == 13 if "%H" in iso8601_tz_aware_format_datetime: - assert parsed.dt.hour().single() == 1 + assert parsed.dt.hour().item() == 1 if "%M" in iso8601_tz_aware_format_datetime: - assert parsed.dt.minute().single() == 12 + assert parsed.dt.minute().item() == 12 if "%S" in iso8601_tz_aware_format_datetime: - assert parsed.dt.second().single() == 34 + assert parsed.dt.second().item() == 34 if "%9f" in iso8601_tz_aware_format_datetime: - assert parsed.dt.nanosecond().single() == 123456789 + assert parsed.dt.nanosecond().item() == 123456789 if "%6f" in iso8601_tz_aware_format_datetime: - assert parsed.dt.nanosecond().single() == 123456000 + assert parsed.dt.nanosecond().item() == 123456000 if "%3f" in iso8601_tz_aware_format_datetime: - assert parsed.dt.nanosecond().single() == 123000000 + assert parsed.dt.nanosecond().item() == 123000000 assert parsed.dtype == pl.Datetime("ns", "UTC") @@ -2351,9 +2351,9 @@ def test_infer_iso8601_date(iso8601_format_date: str) -> None: .replace("%d", "13") ) parsed = pl.Series([time_string]).str.strptime(pl.Date) - assert parsed.dt.year().single() == 2134 - assert parsed.dt.month().single() == 12 - assert parsed.dt.day().single() == 13 + assert parsed.dt.year().item() == 2134 + assert parsed.dt.month().item() == 12 + assert parsed.dt.day().item() == 13 def test_year_null_backed_by_out_of_range_15313() -> None: @@ -2438,7 +2438,7 @@ def test_weekday_vs_stdlib_datetime( pl.Series([value], dtype=pl.Datetime(time_unit)) .dt.replace_time_zone(time_zone, non_existent="null", ambiguous="null") .dt.weekday() - .single() + .item() ) if result is not None: expected = value.isoweekday() @@ -2449,7 +2449,7 @@ def test_weekday_vs_stdlib_datetime( value=st.dates(), ) def test_weekday_vs_stdlib_date(value: date) -> None: - result = pl.Series([value]).dt.weekday().single() + result = pl.Series([value]).dt.weekday().item() expected = value.isoweekday() assert result == expected diff --git a/py-polars/tests/unit/expr/test_exprs.py b/py-polars/tests/unit/expr/test_exprs.py index 69a38a46c8b6..ee91d8aeae42 100644 --- a/py-polars/tests/unit/expr/test_exprs.py +++ b/py-polars/tests/unit/expr/test_exprs.py @@ -101,7 +101,7 @@ def test_len_expr() -> None: out = df.select(pl.len()) assert out.shape == (1, 1) - assert cast(int, out.single()) == 5 + assert cast(int, out.item()) == 5 out = df.group_by("b", maintain_order=True).agg(pl.len()) assert out["b"].to_list() == ["a", "b"] @@ -518,11 +518,11 @@ def lit_series(value: Any, dtype: PolarsDataType | None) -> pl.Series: def test_lit_empty_tu() -> None: td = timedelta(1) - assert pl.select(pl.lit(td, dtype=pl.Duration)).single() == td + assert pl.select(pl.lit(td, dtype=pl.Duration)).item() == td assert pl.select(pl.lit(td, dtype=pl.Duration)).dtypes[0].time_unit == "us" # type: ignore[attr-defined] t = datetime(2023, 1, 1) - assert pl.select(pl.lit(t, dtype=pl.Datetime)).single() == t + assert pl.select(pl.lit(t, dtype=pl.Datetime)).item() == t assert pl.select(pl.lit(t, dtype=pl.Datetime)).dtypes[0].time_unit == "us" # type: ignore[attr-defined] diff --git a/py-polars/tests/unit/expr/test_literal.py b/py-polars/tests/unit/expr/test_literal.py index 5192e2bed24a..3e6f5e59b4d8 100644 --- a/py-polars/tests/unit/expr/test_literal.py +++ b/py-polars/tests/unit/expr/test_literal.py @@ -34,10 +34,10 @@ def test_literal_scalar_list_18686() -> None: def test_literal_integer_20807() -> None: for i in range(100): value = 2**i - assert pl.select(pl.lit(value)).single() == value - assert pl.select(pl.lit(-value)).single() == -value - assert pl.select(pl.lit(value, dtype=pl.Int128)).single() == value - assert pl.select(pl.lit(-value, dtype=pl.Int128)).single() == -value + assert pl.select(pl.lit(value)).item() == value + assert pl.select(pl.lit(-value)).item() == -value + assert pl.select(pl.lit(value, dtype=pl.Int128)).item() == value + assert pl.select(pl.lit(-value, dtype=pl.Int128)).item() == -value @pytest.mark.parametrize( @@ -58,7 +58,7 @@ def test_literal_datetime_timezone(tz: Any, lit_dtype: pl.DataType | None) -> No assert_frame_equal(df1, df2) assert df1.schema["dt"] == expected_dtype - assert df1.single() == value + assert df1.item() == value @pytest.mark.parametrize( @@ -107,7 +107,7 @@ def test_literal_datetime_timezone_utc_offset( for df in (df1, df2): assert df.schema["dt"] == expected_dtype - assert df.single() == expected_item + assert df.item() == expected_item def test_literal_datetime_timezone_utc_error() -> None: diff --git a/py-polars/tests/unit/functions/as_datatype/test_datetime.py b/py-polars/tests/unit/functions/as_datatype/test_datetime.py index cfffc993c653..688ecc2ec7ce 100644 --- a/py-polars/tests/unit/functions/as_datatype/test_datetime.py +++ b/py-polars/tests/unit/functions/as_datatype/test_datetime.py @@ -83,19 +83,19 @@ def test_datetime_invalid_time_component(components: list[int]) -> None: def test_datetime_time_unit(time_unit: TimeUnit) -> None: result = pl.datetime(2022, 1, 2, time_unit=time_unit) - assert pl.select(result.dt.year()).single() == 2022 - assert pl.select(result.dt.month()).single() == 1 - assert pl.select(result.dt.day()).single() == 2 + assert pl.select(result.dt.year()).item() == 2022 + assert pl.select(result.dt.month()).item() == 1 + assert pl.select(result.dt.day()).item() == 2 @pytest.mark.parametrize("time_zone", [None, "Europe/Amsterdam", "UTC"]) def test_datetime_time_zone(time_zone: str | None) -> None: result = pl.datetime(2022, 1, 2, 10, time_zone=time_zone) - assert pl.select(result.dt.year()).single() == 2022 - assert pl.select(result.dt.month()).single() == 1 - assert pl.select(result.dt.day()).single() == 2 - assert pl.select(result.dt.hour()).single() == 10 + assert pl.select(result.dt.year()).item() == 2022 + assert pl.select(result.dt.month()).item() == 1 + assert pl.select(result.dt.day()).item() == 2 + assert pl.select(result.dt.hour()).item() == 10 def test_datetime_ambiguous_time_zone() -> None: @@ -110,7 +110,7 @@ def test_datetime_ambiguous_time_zone_earliest() -> None: 2018, 10, 28, 2, 30, time_zone="Europe/Brussels", ambiguous="earliest" ) - result = pl.select(expr).single() + result = pl.select(expr).item() expected = datetime(2018, 10, 28, 2, 30, tzinfo=ZoneInfo("Europe/Brussels")) assert result == expected diff --git a/py-polars/tests/unit/functions/as_datatype/test_duration.py b/py-polars/tests/unit/functions/as_datatype/test_duration.py index bd58e7e349d4..3f6f99770142 100644 --- a/py-polars/tests/unit/functions/as_datatype/test_duration.py +++ b/py-polars/tests/unit/functions/as_datatype/test_duration.py @@ -39,11 +39,10 @@ def test_duration_time_units(time_unit: TimeUnit, expected: timedelta) -> None: ) ) assert result.collect_schema()["duration"] == pl.Duration(time_unit) - assert result.collect()["duration"].single() == expected + assert result.collect()["duration"].item() == expected if time_unit == "ns": assert ( - result.collect()["duration"].dt.total_nanoseconds().single() - == 86523004005006 + result.collect()["duration"].dt.total_nanoseconds().item() == 86523004005006 ) diff --git a/py-polars/tests/unit/functions/range/test_datetime_range.py b/py-polars/tests/unit/functions/range/test_datetime_range.py index 668373a99f7e..d052900b1e45 100644 --- a/py-polars/tests/unit/functions/range/test_datetime_range.py +++ b/py-polars/tests/unit/functions/range/test_datetime_range.py @@ -602,7 +602,7 @@ def test_datetime_range_fast_slow_paths( unit: str, start: datetime, ) -> None: - end = pl.select(pl.lit(start).dt.offset_by(f"{n * size}{unit}")).single() + end = pl.select(pl.lit(start).dt.offset_by(f"{n * size}{unit}")).item() result_slow = pl.datetime_range( start, end, diff --git a/py-polars/tests/unit/functions/test_business_day_count.py b/py-polars/tests/unit/functions/test_business_day_count.py index 32531ec636b5..1a33a69c81af 100644 --- a/py-polars/tests/unit/functions/test_business_day_count.py +++ b/py-polars/tests/unit/functions/test_business_day_count.py @@ -161,7 +161,7 @@ def test_against_np_busday_count( "start", "end", week_mask=week_mask, holidays=holidays ) )["n"] - .single() + .item() ) expected = np.busday_count(start, end, weekmask=week_mask, holidays=holidays) if start > end and parse_version(np.__version__) < (1, 25): diff --git a/py-polars/tests/unit/functions/test_functions.py b/py-polars/tests/unit/functions/test_functions.py index 7c786b4d53a0..c4bd4c87f4e1 100644 --- a/py-polars/tests/unit/functions/test_functions.py +++ b/py-polars/tests/unit/functions/test_functions.py @@ -237,7 +237,7 @@ def test_cov() -> None: # expect same result from both approaches for idx, (r1, r2) in enumerate(zip(res1, res2)): expected_value = -645.8333333333 if idx == 0 else -1291.6666666666 - assert pytest.approx(expected_value) == r1.single() + assert pytest.approx(expected_value) == r1.item() assert_series_equal(r1, r2) @@ -260,7 +260,7 @@ def test_corr() -> None: # expect same result from both approaches for idx, (r1, r2) in enumerate(zip(res1, res2)): - assert pytest.approx(-0.412199756 if idx == 0 else -0.5) == r1.single() + assert pytest.approx(-0.412199756 if idx == 0 else -0.5) == r1.item() assert_series_equal(r1, r2) @@ -284,12 +284,10 @@ def test_null_handling_correlation() -> None: df1 = pl.DataFrame({"a": [None, 1, 2], "b": [None, 2, 1]}) df2 = pl.DataFrame({"a": [np.nan, 1, 2], "b": [np.nan, 2, 1]}) - assert np.isclose(df1.select(pl.corr("a", "b", method="spearman")).single(), -1.0) + assert np.isclose(df1.select(pl.corr("a", "b", method="spearman")).item(), -1.0) assert ( str( - df2.select( - pl.corr("a", "b", method="spearman", propagate_nans=True) - ).single() + df2.select(pl.corr("a", "b", method="spearman", propagate_nans=True)).item() ) == "nan" ) diff --git a/py-polars/tests/unit/functions/test_lit.py b/py-polars/tests/unit/functions/test_lit.py index 1aa9140f5f04..ad3df8b9e422 100644 --- a/py-polars/tests/unit/functions/test_lit.py +++ b/py-polars/tests/unit/functions/test_lit.py @@ -145,9 +145,9 @@ class State(*EnumBase): # type: ignore[misc] pl.lit(value), pl.lit(value.value), # type: ignore[attr-defined] ): - assert pl.select(lit_value).single() == expected - assert df.filter(state=value).single() == expected - assert df.filter(state=lit_value).single() == expected + assert pl.select(lit_value).item() == expected + assert df.filter(state=value).item() == expected + assert df.filter(state=lit_value).item() == expected assert df.filter(pl.col("state") == State.QLD).is_empty() assert df.filter(pl.col("state") != State.QLD).height == 2 @@ -174,11 +174,11 @@ class Number(*EnumBase): # type: ignore[misc] result = pl.lit(value) assert pl.select(result).dtypes[0] == pl.Int32 - assert pl.select(result).single() == 1 + assert pl.select(result).item() == 1 result = pl.lit(value, dtype=pl.Int8) assert pl.select(result).dtypes[0] == pl.Int8 - assert pl.select(result).single() == 1 + assert pl.select(result).item() == 1 @given(value=datetimes("ns")) @@ -215,7 +215,7 @@ def test_lit_decimal() -> None: expr = pl.lit(value) df = pl.select(expr) - result = df.single() + result = df.item() assert df.dtypes[0] == pl.Decimal(None, 1) assert result == value @@ -226,7 +226,7 @@ def test_lit_string_float() -> None: expr = pl.lit(value, dtype=pl.Utf8) df = pl.select(expr) - result = df.single() + result = df.item() assert df.dtypes[0] == pl.String assert result == str(value) @@ -236,11 +236,11 @@ def test_lit_string_float() -> None: @given(s=series(min_size=1, max_size=1, allow_null=False, allowed_dtypes=pl.Decimal)) def test_lit_decimal_parametric(s: pl.Series) -> None: scale = s.dtype.scale # type: ignore[attr-defined] - value = s.single() + value = s.item() expr = pl.lit(value) df = pl.select(expr) - result = df.single() + result = df.item() assert df.dtypes[0] == pl.Decimal(None, scale) assert result == value diff --git a/py-polars/tests/unit/functions/test_when_then.py b/py-polars/tests/unit/functions/test_when_then.py index dcf4d60c9458..43e646d09b0c 100644 --- a/py-polars/tests/unit/functions/test_when_then.py +++ b/py-polars/tests/unit/functions/test_when_then.py @@ -328,7 +328,7 @@ def test_single_element_broadcast( expected = df.select("x").head( df.select( pl.max_horizontal(mask_expr.len(), truthy_expr.len(), falsy_expr.len()) - ).single() + ).item() ) assert_frame_equal(result, expected) diff --git a/py-polars/tests/unit/interchange/test_column.py b/py-polars/tests/unit/interchange/test_column.py index 4b9aade0a126..abe592fe3e83 100644 --- a/py-polars/tests/unit/interchange/test_column.py +++ b/py-polars/tests/unit/interchange/test_column.py @@ -251,8 +251,8 @@ def test_get_buffers_chunked_bitmask() -> None: col = PolarsColumn(s_chunked) chunks = list(col.get_chunks()) - assert chunks[0].get_buffers()["data"][0]._data.single() is True - assert chunks[1].get_buffers()["data"][0]._data.single() is False + assert chunks[0].get_buffers()["data"][0]._data.item() is True + assert chunks[1].get_buffers()["data"][0]._data.item() is False def test_get_buffers_string_zero_copy_fails() -> None: diff --git a/py-polars/tests/unit/io/database/test_write.py b/py-polars/tests/unit/io/database/test_write.py index 77130772eeec..8cd5a4ee54f9 100644 --- a/py-polars/tests/unit/io/database/test_write.py +++ b/py-polars/tests/unit/io/database/test_write.py @@ -301,7 +301,7 @@ def test_write_database_sa_rollback(tmp_path: str, pass_connection: bool) -> Non with Session(engine) as session: count = pl.read_database( query=f"select count(*) from {table_name}", connection=session - )[0, 0] + ).item(0, 0) assert isinstance(count, int) assert count == 0 diff --git a/py-polars/tests/unit/io/test_hive.py b/py-polars/tests/unit/io/test_hive.py index 2e345616fb36..90c98c68eb38 100644 --- a/py-polars/tests/unit/io/test_hive.py +++ b/py-polars/tests/unit/io/test_hive.py @@ -349,8 +349,8 @@ def test_hive_partition_directory_scan( ] # fmt: skip for df in dfs: - a = df[0, "a"] - b = df[0, "b"] + a = df.item(0, "a") + b = df.item(0, "b") path = tmp_path / f"a={a}/b={b}/data.bin" path.parent.mkdir(exist_ok=True, parents=True) write_func(df.drop("a", "b"), path) @@ -777,7 +777,7 @@ def test_hive_partition_filter_null_23005(tmp_path: Path) -> None: pl.any_horizontal(pl.col("date1", "date2").is_null()) & pl.col("path").str.contains("__HIVE_DEFAULT_PARTITION__") ).sum() - ).single() + ).item() == 2 ) diff --git a/py-polars/tests/unit/io/test_io_plugin.py b/py-polars/tests/unit/io/test_io_plugin.py index 45b917b8b682..e67ed18d9a8b 100644 --- a/py-polars/tests/unit/io/test_io_plugin.py +++ b/py-polars/tests/unit/io/test_io_plugin.py @@ -180,7 +180,7 @@ def _source( # check the expression directly dt_val, column_cast = pushed_predicate.meta.pop() # Extract the datetime value from the expression - assert pl.DataFrame({}).select(dt_val).single() == cutoff + assert pl.DataFrame({}).select(dt_val).item() == cutoff column = column_cast.meta.pop()[0] assert column.meta == pl.col("timestamp") diff --git a/py-polars/tests/unit/io/test_lazy_count_star.py b/py-polars/tests/unit/io/test_lazy_count_star.py index 28b77c660bcb..42e672c090e1 100644 --- a/py-polars/tests/unit/io/test_lazy_count_star.py +++ b/py-polars/tests/unit/io/test_lazy_count_star.py @@ -43,7 +43,7 @@ def assert_fast_count( assert project_logs == {"project: 0"} assert result.schema == {expected_name: pl.get_index_type()} - assert result.single() == expected_count + assert result.item() == expected_count # Test effect of the environment variable monkeypatch.setenv("POLARS_FAST_FILE_COUNT_DISPATCH", "0") @@ -114,10 +114,8 @@ def test_count_csv_no_newline_on_last_22564() -> None: assert pl.scan_csv(data).collect().height == 3 assert pl.scan_csv(data, comment_prefix="#").collect().height == 3 - assert pl.scan_csv(data).select(pl.len()).collect().single() == 3 - assert ( - pl.scan_csv(data, comment_prefix="#").select(pl.len()).collect().single() == 3 - ) + assert pl.scan_csv(data).select(pl.len()).collect().item() == 3 + assert pl.scan_csv(data, comment_prefix="#").select(pl.len()).collect().item() == 3 @pytest.mark.write_disk @@ -231,7 +229,7 @@ def test_count_projection_pd( project_logs = set(re.findall(r"project: \d+", capture)) assert project_logs == {"project: 0"} - assert result.single() == 3 + assert result.item() == 3 def test_csv_scan_skip_lines_len_22889( diff --git a/py-polars/tests/unit/io/test_lazy_parquet.py b/py-polars/tests/unit/io/test_lazy_parquet.py index aee3efb42ca1..4dbee96024a8 100644 --- a/py-polars/tests/unit/io/test_lazy_parquet.py +++ b/py-polars/tests/unit/io/test_lazy_parquet.py @@ -66,7 +66,7 @@ def test_row_index(foods_parquet_path: Path) -> None: def test_row_index_len_16543(foods_parquet_path: Path) -> None: q = pl.scan_parquet(foods_parquet_path).with_row_index() - assert q.select(pl.all()).select(pl.len()).collect().single() == 27 + assert q.select(pl.all()).select(pl.len()).collect().item() == 27 @pytest.mark.write_disk diff --git a/py-polars/tests/unit/io/test_parquet.py b/py-polars/tests/unit/io/test_parquet.py index 326686191760..8670c5356020 100644 --- a/py-polars/tests/unit/io/test_parquet.py +++ b/py-polars/tests/unit/io/test_parquet.py @@ -573,7 +573,7 @@ def test_parquet_nano_second_schema() -> None: df = pd.DataFrame({"Time": [value]}) df.to_parquet(f) f.seek(0) - assert pl.read_parquet(f).single() == value + assert pl.read_parquet(f).item() == value def test_nested_struct_read_12610() -> None: @@ -2733,7 +2733,7 @@ def test_boolean_slice_pushdown_20314() -> None: s.to_frame().write_parquet(f) f.seek(0) - assert pl.scan_parquet(f).slice(2, 1).collect().single() + assert pl.scan_parquet(f).slice(2, 1).collect().item() def test_load_pred_pushdown_fsl_19241() -> None: diff --git a/py-polars/tests/unit/io/test_scan.py b/py-polars/tests/unit/io/test_scan.py index 494437d5dcd3..88ab7f9efdc6 100644 --- a/py-polars/tests/unit/io/test_scan.py +++ b/py-polars/tests/unit/io/test_scan.py @@ -918,7 +918,7 @@ def test_scan_csv_bytesio_memory_usage( pl.scan_csv(f) .filter(pl.col("mydata") == 999_999) .collect(engine="streaming" if streaming else "in-memory") - .single() + .item() == 999_999 ) # assert memory_usage.get_peak() - starting_memory < 1_000_000 diff --git a/py-polars/tests/unit/io/test_scan_row_deletion.py b/py-polars/tests/unit/io/test_scan_row_deletion.py index 7ed92d056726..60e3a333ac3e 100644 --- a/py-polars/tests/unit/io/test_scan_row_deletion.py +++ b/py-polars/tests/unit/io/test_scan_row_deletion.py @@ -97,7 +97,7 @@ def apply_row_index_offset(values: list[int]) -> list[int]: hive_partitioning=False, ).with_row_index(offset=row_index_offset) - assert q.select(pl.len()).collect().single() == 18 + assert q.select(pl.len()).collect().item() == 18 assert_frame_equal( q.collect(), @@ -389,7 +389,7 @@ def remove_data(path: Path) -> None: # Baseline: The metadata is readable but the row groups are not assert q.collect_schema() == {"physical_index": pl.UInt32} - assert q.select(pl.len()).collect().single() == 5 + assert q.select(pl.len()).collect().item() == 5 with pytest.raises(pl.exceptions.ComputeError, match="Invalid thrift"): q.collect() diff --git a/py-polars/tests/unit/lazyframe/test_collect_all.py b/py-polars/tests/unit/lazyframe/test_collect_all.py index 140fdf52d3cc..8922c96a0b2d 100644 --- a/py-polars/tests/unit/lazyframe/test_collect_all.py +++ b/py-polars/tests/unit/lazyframe/test_collect_all.py @@ -16,5 +16,5 @@ def test_collect_all(df: pl.DataFrame, optimizations: pl.QueryOptFlags) -> None: lf1 = df.lazy().select(pl.col("int").sum()) lf2 = df.lazy().select((pl.col("floats") * 2).sum()) out = pl.collect_all([lf1, lf2], optimizations=optimizations) - assert cast(int, out[0].single()) == 6 - assert cast(float, out[1].single()) == 12.0 + assert cast(int, out[0].item()) == 6 + assert cast(float, out[1].item()) == 12.0 diff --git a/py-polars/tests/unit/lazyframe/test_lazyframe.py b/py-polars/tests/unit/lazyframe/test_lazyframe.py index 7a957c1fdafd..e6fa0e82ff5e 100644 --- a/py-polars/tests/unit/lazyframe/test_lazyframe.py +++ b/py-polars/tests/unit/lazyframe/test_lazyframe.py @@ -207,7 +207,7 @@ def test_filter_multiple_predicates() -> None: "predicate": ["==", ">", ">="], }, ) - assert ldf.filter(predicate="==").select("description").collect().single() == "eq" + assert ldf.filter(predicate="==").select("description").collect().item() == "eq" @pytest.mark.parametrize( @@ -490,7 +490,7 @@ def test_is_finite_is_infinite() -> None: def test_len() -> None: ldf = pl.LazyFrame({"nrs": [1, 2, 3]}) - assert cast(int, ldf.select(pl.col("nrs").len()).collect().single()) == 3 + assert cast(int, ldf.select(pl.col("nrs").len()).collect().item()) == 3 @pytest.mark.parametrize("dtype", NUMERIC_DTYPES) @@ -576,7 +576,7 @@ def test_dot() -> None: ldf = pl.LazyFrame({"a": [1.8, 1.2, 3.0], "b": [3.2, 1, 2]}).select( pl.col("a").dot(pl.col("b")) ) - assert cast(float, ldf.collect().single()) == 12.96 + assert cast(float, ldf.collect().item()) == 12.96 def test_sort() -> None: @@ -870,7 +870,7 @@ def test_float_floor_divide() -> None: x = 10.4 step = 0.5 ldf = pl.LazyFrame({"x": [x]}) - ldf_res = ldf.with_columns(pl.col("x") // step).collect().single() + ldf_res = ldf.with_columns(pl.col("x") // step).collect().item() assert ldf_res == x // step @@ -1499,7 +1499,7 @@ def test_unique_length_multiple_columns() -> None: "b": [100, 100, 200, 100, 300], } ) - assert lf.unique().select(pl.len()).collect().single() == 4 + assert lf.unique().select(pl.len()).collect().item() == 4 def test_asof_cross_join() -> None: diff --git a/py-polars/tests/unit/lazyframe/test_order_observability.py b/py-polars/tests/unit/lazyframe/test_order_observability.py index 321d97c7b46e..a145be6c4ed4 100644 --- a/py-polars/tests/unit/lazyframe/test_order_observability.py +++ b/py-polars/tests/unit/lazyframe/test_order_observability.py @@ -541,7 +541,7 @@ def test_reverse_non_order_observe() -> None: plan = q.explain() assert "UNIQUE[maintain_order: false" in plan - assert q.collect().single() == 10 + assert q.collect().item() == 10 # Observing the order of the output of `reverse()` implicitly observes the # input to `reverse()`. @@ -554,7 +554,7 @@ def test_reverse_non_order_observe() -> None: plan = q.explain() assert "UNIQUE[maintain_order: true" in plan - assert q.collect().single() == 0 + assert q.collect().item() == 0 # Zipping `reverse()` must also consider the ordering of the input to # `reverse()`. diff --git a/py-polars/tests/unit/ml/test_torch.py b/py-polars/tests/unit/ml/test_torch.py index fa8656fbc696..735c0cb7a4be 100644 --- a/py-polars/tests/unit/ml/test_torch.py +++ b/py-polars/tests/unit/ml/test_torch.py @@ -62,7 +62,7 @@ def test_to_torch_tensor(df: pl.DataFrame) -> None: t2 = df.to_torch("tensor") assert list(t1.shape) == [4, 3] - assert (t1 == t2).all().single() is True + assert (t1 == t2).all().item() is True def test_to_torch_dict(df: pl.DataFrame) -> None: diff --git a/py-polars/tests/unit/operations/aggregation/test_aggregations.py b/py-polars/tests/unit/operations/aggregation/test_aggregations.py index 4f820872974e..46bf85cb3520 100644 --- a/py-polars/tests/unit/operations/aggregation/test_aggregations.py +++ b/py-polars/tests/unit/operations/aggregation/test_aggregations.py @@ -282,8 +282,8 @@ def test_sum_empty_and_null_set() -> None: {"a": [None, None, None], "b": [1, 1, 1]}, schema={"a": pl.Float32, "b": pl.Int64}, ) - assert df.select(pl.sum("a")).single() == 0.0 - assert df.group_by("b").agg(pl.sum("a"))["a"].single() == 0.0 + assert df.select(pl.sum("a")).item() == 0.0 + assert df.group_by("b").agg(pl.sum("a"))["a"].item() == 0.0 def test_horizontal_sum_null_to_identity() -> None: diff --git a/py-polars/tests/unit/operations/aggregation/test_vertical.py b/py-polars/tests/unit/operations/aggregation/test_vertical.py index 073769ccda67..fc74fdf59b65 100644 --- a/py-polars/tests/unit/operations/aggregation/test_vertical.py +++ b/py-polars/tests/unit/operations/aggregation/test_vertical.py @@ -36,7 +36,7 @@ def test_all_expr() -> None: def test_any_expr(fruits_cars: pl.DataFrame) -> None: - assert fruits_cars.with_columns(pl.col("A").cast(bool)).select(pl.any("A")).single() + assert fruits_cars.with_columns(pl.col("A").cast(bool)).select(pl.any("A")).item() @pytest.mark.parametrize("function", ["all", "any"]) diff --git a/py-polars/tests/unit/operations/namespaces/string/test_concat.py b/py-polars/tests/unit/operations/namespaces/string/test_concat.py index 5025e84ef5f6..13ee591cd3a8 100644 --- a/py-polars/tests/unit/operations/namespaces/string/test_concat.py +++ b/py-polars/tests/unit/operations/namespaces/string/test_concat.py @@ -27,10 +27,10 @@ def test_str_join2() -> None: df = pl.DataFrame({"foo": [1, None, 2, None]}) out = df.select(pl.col("foo").str.join(ignore_nulls=False)) - assert out.single() is None + assert out.item() is None out = df.select(pl.col("foo").str.join()) - assert out.single() == "12" + assert out.item() == "12" def test_str_join_all_null() -> None: @@ -50,14 +50,14 @@ def test_str_join_empty_list() -> None: def test_str_join_empty_list2() -> None: s = pl.Series([], dtype=pl.String) df = pl.DataFrame({"foo": s}) - result = df.select(pl.col("foo").str.join()).single() + result = df.select(pl.col("foo").str.join()).item() expected = "" assert result == expected def test_str_join_empty_list_agg_context() -> None: df = pl.DataFrame(data={"i": [1], "v": [None]}, schema_overrides={"v": pl.String}) - result = df.group_by("i").agg(pl.col("v").drop_nulls().str.join())["v"].single() + result = df.group_by("i").agg(pl.col("v").drop_nulls().str.join())["v"].item() expected = "" assert result == expected @@ -65,9 +65,9 @@ def test_str_join_empty_list_agg_context() -> None: def test_str_join_datetime() -> None: df = pl.DataFrame({"d": [datetime(2020, 1, 1), None, datetime(2022, 1, 1)]}) out = df.select(pl.col("d").str.join("|", ignore_nulls=True)) - assert out.single() == "2020-01-01 00:00:00.000000|2022-01-01 00:00:00.000000" + assert out.item() == "2020-01-01 00:00:00.000000|2022-01-01 00:00:00.000000" out = df.select(pl.col("d").str.join("|", ignore_nulls=False)) - assert out.single() is None + assert out.item() is None def test_str_concat_deprecated() -> None: diff --git a/py-polars/tests/unit/operations/namespaces/string/test_string.py b/py-polars/tests/unit/operations/namespaces/string/test_string.py index 9084a0b4be33..a425792cd2ff 100644 --- a/py-polars/tests/unit/operations/namespaces/string/test_string.py +++ b/py-polars/tests/unit/operations/namespaces/string/test_string.py @@ -327,7 +327,7 @@ def test_str_find_invalid_regex() -> None: df.with_columns(pl.col("txt").str.find(rx_invalid, strict=True)) res = df.with_columns(pl.col("txt").str.find(rx_invalid, strict=False)) - assert res.single() is None + assert res.item() is None def test_str_find_escaped_chars() -> None: @@ -1058,7 +1058,7 @@ def test_contains_any( expected == df["text"] .str.contains_any(pattern, ascii_case_insensitive=case_insensitive) - .single() + .item() ) # expr assert ( @@ -1067,7 +1067,7 @@ def test_contains_any( pl.col("text").str.contains_any( pattern, ascii_case_insensitive=case_insensitive ) - )["text"].single() + )["text"].item() ) # frame filter assert int(expected) == len( @@ -1282,7 +1282,7 @@ def test_replace_many( expected == df["text"] .str.replace_many(pattern, replacement, ascii_case_insensitive=case_insensitive) - .single() + .item() ) # expr assert ( @@ -1293,7 +1293,7 @@ def test_replace_many( replacement, ascii_case_insensitive=case_insensitive, ) - ).single() + ).item() ) @@ -1339,7 +1339,7 @@ def test_replace_many_mapping( expected == df["text"] .str.replace_many(mapping, ascii_case_insensitive=case_insensitive) - .single() + .item() ) # expr assert ( @@ -1349,7 +1349,7 @@ def test_replace_many_mapping( mapping, ascii_case_insensitive=case_insensitive, ) - ).single() + ).item() ) diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py b/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py index f1895bf0c67e..b7c77ef473bb 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py @@ -279,7 +279,7 @@ def test_against_np_busday_offset( n, week_mask=week_mask, holidays=holidays, roll=roll ) )["res"] - .single() + .item() ) expected = np.busday_offset( start, n, weekmask=week_mask, holidays=holidays, roll=roll diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py index 924dbd8f983a..d752e9ed74d4 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py @@ -106,7 +106,7 @@ def test_dt_date_and_time( attribute: str, time_zone: None | str, expected: date | time ) -> None: ser = pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone) - result = getattr(ser.dt, attribute)().single() + result = getattr(ser.dt, attribute)().item() assert result == expected @@ -121,7 +121,7 @@ def test_dt_replace_time_zone_none(time_zone: str | None, time_unit: TimeUnit) - result = ser.dt.replace_time_zone(None) expected = datetime(2022, 1, 1, 23) assert result.dtype == pl.Datetime(time_unit, None) - assert result.single() == expected + assert result.item() == expected def test_dt_datetime_deprecated() -> None: @@ -130,7 +130,7 @@ def test_dt_datetime_deprecated() -> None: result = s.dt.datetime() expected = datetime(2022, 1, 1, 23) assert result.dtype == pl.Datetime(time_zone=None) - assert result.single() == expected + assert result.item() == expected @pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu", "UTC"]) @@ -175,7 +175,7 @@ def test_local_time_before_epoch(time_unit: TimeUnit) -> None: ser = pl.Series([datetime(1969, 7, 21, 2, 56, 2, 123000)]).dt.cast_time_unit( time_unit ) - result = ser.dt.time().single() + result = ser.dt.time().item() expected = time(2, 56, 2, 123000) assert result == expected @@ -1042,7 +1042,7 @@ def test_offset_by_expressions() -> None: def test_offset_by_saturating_8217_8474( duration: str, input_date: date, expected: date ) -> None: - result = pl.Series([input_date]).dt.offset_by(duration).single() + result = pl.Series([input_date]).dt.offset_by(duration).item() assert result == expected @@ -1463,7 +1463,7 @@ def test_literal_from_date( if dtype == pl.Datetime: tz = ZoneInfo(dtype.time_zone) if dtype.time_zone is not None else None # type: ignore[union-attr] value = datetime(value.year, value.month, value.day, tzinfo=tz) - assert out.single() == value + assert out.item() == value @pytest.mark.parametrize( @@ -1511,7 +1511,7 @@ def test_literal_from_datetime( value = value.replace(tzinfo=ZoneInfo(dtype.time_zone)) # type: ignore[union-attr] assert out.schema == OrderedDict({"literal": dtype}) - assert out.single() == value + assert out.item() == value @pytest.mark.parametrize( @@ -1526,7 +1526,7 @@ def test_literal_from_datetime( def test_literal_from_time(value: time) -> None: out = pl.select(pl.lit(value)) assert out.schema == OrderedDict({"literal": pl.Time}) - assert out.single() == value + assert out.item() == value @pytest.mark.parametrize( @@ -1550,4 +1550,4 @@ def test_literal_from_time(value: time) -> None: def test_literal_from_timedelta(value: time, dtype: pl.Duration | None) -> None: out = pl.select(pl.lit(value, dtype=dtype)) assert out.schema == OrderedDict({"literal": dtype or pl.Duration("us")}) - assert out.single() == value + assert out.item() == value diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_month_start_end.py b/py-polars/tests/unit/operations/namespaces/temporal/test_month_start_end.py index 8b99d0e0e406..f25192106090 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_month_start_end.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_month_start_end.py @@ -39,7 +39,7 @@ def test_month_start_datetime( time_zone: str | None, ) -> None: ser = pl.Series([dt]).dt.replace_time_zone(time_zone).dt.cast_time_unit(time_unit) - result = ser.dt.month_start().single() + result = ser.dt.month_start().item() assert result == expected.replace(tzinfo=tzinfo) @@ -52,7 +52,7 @@ def test_month_start_datetime( ) def test_month_start_date(dt: date, expected: date) -> None: ser = pl.Series([dt]) - result = ser.dt.month_start().single() + result = ser.dt.month_start().item() assert result == expected @@ -84,7 +84,7 @@ def test_month_end_datetime( time_zone: str | None, ) -> None: ser = pl.Series([dt]).dt.replace_time_zone(time_zone).dt.cast_time_unit(time_unit) - result = ser.dt.month_end().single() + result = ser.dt.month_end().item() assert result == expected.replace(tzinfo=tzinfo) @@ -97,7 +97,7 @@ def test_month_end_datetime( ) def test_month_end_date(dt: date, expected: date) -> None: ser = pl.Series([dt]) - result = ser.dt.month_end().single() + result = ser.dt.month_end().item() assert result == expected diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_round.py b/py-polars/tests/unit/operations/namespaces/temporal/test_round.py index 7d37dc655b16..5fefcdaf5893 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_round.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_round.py @@ -169,9 +169,9 @@ def test_round_date() -> None: @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) def test_round_datetime_simple(time_unit: TimeUnit) -> None: s = pl.Series([datetime(2020, 1, 2, 6)], dtype=pl.Datetime(time_unit)) - result = s.dt.round("1mo").single() + result = s.dt.round("1mo").item() assert result == datetime(2020, 1, 1) - result = s.dt.round("1d").single() + result = s.dt.round("1d").item() assert result == datetime(2020, 1, 2) @@ -197,14 +197,14 @@ def test_round_datetime_w_expression(time_unit: TimeUnit) -> None: def test_round_negative_towards_epoch_18239(time_unit: TimeUnit, expected: int) -> None: s = pl.Series([datetime(1970, 1, 1)], dtype=pl.Datetime(time_unit)) s = s.dt.offset_by(f"-1{time_unit}") - result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").single() + result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").item() assert result == expected result = ( s.dt.replace_time_zone("Europe/London") .dt.round(f"2{time_unit}") .dt.replace_time_zone(None) .dt.timestamp(time_unit="ns") - .single() + .item() ) assert result == expected @@ -222,14 +222,14 @@ def test_round_positive_away_from_epoch_18239( ) -> None: s = pl.Series([datetime(1970, 1, 1)], dtype=pl.Datetime(time_unit)) s = s.dt.offset_by(f"1{time_unit}") - result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").single() + result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").item() assert result == expected result = ( s.dt.replace_time_zone("Europe/London") .dt.round(f"2{time_unit}") .dt.replace_time_zone(None) .dt.timestamp(time_unit="ns") - .single() + .item() ) assert result == expected @@ -249,33 +249,33 @@ def test_round_unequal_length_22018(as_date: bool) -> None: def test_round_small() -> None: small = 1.234e-320 small_s = pl.Series([small]) - assert small_s.round().single() == 0.0 - assert small_s.round(320).single() == 1e-320 - assert small_s.round(321).single() == 1.2e-320 - assert small_s.round(322).single() == 1.23e-320 - assert small_s.round(323).single() == 1.234e-320 - assert small_s.round(324).single() == small - assert small_s.round(1000).single() == small - - assert small_s.round_sig_figs(1).single() == 1e-320 - assert small_s.round_sig_figs(2).single() == 1.2e-320 - assert small_s.round_sig_figs(3).single() == 1.23e-320 - assert small_s.round_sig_figs(4).single() == 1.234e-320 - assert small_s.round_sig_figs(5).single() == small - assert small_s.round_sig_figs(1000).single() == small + assert small_s.round().item() == 0.0 + assert small_s.round(320).item() == 1e-320 + assert small_s.round(321).item() == 1.2e-320 + assert small_s.round(322).item() == 1.23e-320 + assert small_s.round(323).item() == 1.234e-320 + assert small_s.round(324).item() == small + assert small_s.round(1000).item() == small + + assert small_s.round_sig_figs(1).item() == 1e-320 + assert small_s.round_sig_figs(2).item() == 1.2e-320 + assert small_s.round_sig_figs(3).item() == 1.23e-320 + assert small_s.round_sig_figs(4).item() == 1.234e-320 + assert small_s.round_sig_figs(5).item() == small + assert small_s.round_sig_figs(1000).item() == small def test_round_big() -> None: big = 1.234e308 max_err = big / 10**10 big_s = pl.Series([big]) - assert big_s.round().single() == big - assert big_s.round(1).single() == big - assert big_s.round(100).single() == big - - assert abs(big_s.round_sig_figs(1).single() - 1e308) <= max_err - assert abs(big_s.round_sig_figs(2).single() - 1.2e308) <= max_err - assert abs(big_s.round_sig_figs(3).single() - 1.23e308) <= max_err - assert abs(big_s.round_sig_figs(4).single() - 1.234e308) <= max_err - assert abs(big_s.round_sig_figs(4).single() - big) <= max_err - assert big_s.round_sig_figs(100).single() == big + assert big_s.round().item() == big + assert big_s.round(1).item() == big + assert big_s.round(100).item() == big + + assert abs(big_s.round_sig_figs(1).item() - 1e308) <= max_err + assert abs(big_s.round_sig_figs(2).item() - 1.2e308) <= max_err + assert abs(big_s.round_sig_figs(3).item() - 1.23e308) <= max_err + assert abs(big_s.round_sig_figs(4).item() - 1.234e308) <= max_err + assert abs(big_s.round_sig_figs(4).item() - big) <= max_err + assert big_s.round_sig_figs(100).item() == big diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_to_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_to_datetime.py index b9f05ca4cfc3..14e6d6e060f0 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_to_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_to_datetime.py @@ -109,7 +109,7 @@ def test_to_datetime(datetimes: datetime, fmt: str) -> None: input = datetimes.strftime(fmt) expected = datetime.strptime(input, fmt) try: - result = pl.Series([input]).str.to_datetime(format=fmt).single() + result = pl.Series([input]).str.to_datetime(format=fmt).item() # If there's an exception, check that it's either: # - something which polars can't parse at all: missing day or month # - something on which polars intentionally raises @@ -196,7 +196,7 @@ def test_to_datetime_aware_values_aware_dtype() -> None: def test_to_datetime_two_digit_year_17213( inputs: str, format: str, expected: date ) -> None: - result = pl.Series([inputs]).str.to_date(format=format).single() + result = pl.Series([inputs]).str.to_date(format=format).item() assert result == expected diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py b/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py index 08a2cabad409..ba802fb7c295 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py @@ -24,7 +24,7 @@ n=st.integers(min_value=1, max_value=100), ) def test_truncate_monthly(value: date, n: int) -> None: - result = pl.Series([value]).dt.truncate(f"{n}mo").single() + result = pl.Series([value]).dt.truncate(f"{n}mo").item() # manual calculation total = (value.year - 1970) * 12 + value.month - 1 remainder = total % n @@ -79,9 +79,9 @@ def test_truncate_date() -> None: @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) def test_truncate_datetime_simple(time_unit: TimeUnit) -> None: s = pl.Series([datetime(2020, 1, 2, 6)], dtype=pl.Datetime(time_unit)) - result = s.dt.truncate("1mo").single() + result = s.dt.truncate("1mo").item() assert result == datetime(2020, 1, 1) - result = s.dt.truncate("1d").single() + result = s.dt.truncate("1d").item() assert result == datetime(2020, 1, 2) @@ -157,7 +157,7 @@ def test_truncate_origin_22590( .dt.replace_time_zone(time_zone) .dt.truncate(f"{multiplier}{unit}") .dt.replace_time_zone(None) - .single() + .item() ) assert result == expected, result diff --git a/py-polars/tests/unit/operations/namespaces/test_binary.py b/py-polars/tests/unit/operations/namespaces/test_binary.py index 8de08ab2fb3a..9124fce068d5 100644 --- a/py-polars/tests/unit/operations/namespaces/test_binary.py +++ b/py-polars/tests/unit/operations/namespaces/test_binary.py @@ -167,8 +167,8 @@ def test_compare_decode_between_lazy_and_eager_6814(encoding: TransferEncoding) def test_binary_size(sz: int, unit: SizeUnit, expected: int | float) -> None: df = pl.DataFrame({"data": [b"\x00" * sz]}, schema={"data": pl.Binary}) for sz in ( - df.select(sz=pl.col("data").bin.size(unit)).single(), # expr - df["data"].bin.size(unit).single(), # series + df.select(sz=pl.col("data").bin.size(unit)).item(), # expr + df["data"].bin.size(unit).item(), # series ): assert sz == expected diff --git a/py-polars/tests/unit/operations/namespaces/test_strptime.py b/py-polars/tests/unit/operations/namespaces/test_strptime.py index 6a22c25ba103..051371cec8a5 100644 --- a/py-polars/tests/unit/operations/namespaces/test_strptime.py +++ b/py-polars/tests/unit/operations/namespaces/test_strptime.py @@ -38,12 +38,12 @@ def test_str_strptime() -> None: def test_date_parse_omit_day() -> None: df = pl.DataFrame({"month": ["2022-01"]}) - assert df.select(pl.col("month").str.to_date(format="%Y-%m")).single() == date( + assert df.select(pl.col("month").str.to_date(format="%Y-%m")).item() == date( 2022, 1, 1 ) assert df.select( pl.col("month").str.to_datetime(format="%Y-%m") - ).single() == datetime(2022, 1, 1) + ).item() == datetime(2022, 1, 1) def test_to_datetime_precision() -> None: @@ -280,7 +280,7 @@ def test_to_datetime_dates_datetimes() -> None: ], ) def test_to_datetime_patterns_single(time_string: str, expected: str) -> None: - result = pl.Series([time_string]).str.to_datetime().single() + result = pl.Series([time_string]).str.to_datetime().item() assert result == expected @@ -290,7 +290,7 @@ def test_infer_tz_aware_time_unit(time_unit: TimeUnit) -> None: time_unit=time_unit ) assert result.dtype == pl.Datetime(time_unit, "UTC") - assert result.single() == datetime(2020, 1, 2, 2, 0, tzinfo=timezone.utc) + assert result.item() == datetime(2020, 1, 2, 2, 0, tzinfo=timezone.utc) @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) @@ -299,14 +299,14 @@ def test_infer_tz_aware_with_utc(time_unit: TimeUnit) -> None: time_unit=time_unit ) assert result.dtype == pl.Datetime(time_unit, "UTC") - assert result.single() == datetime(2020, 1, 2, 2, 0, tzinfo=timezone.utc) + assert result.item() == datetime(2020, 1, 2, 2, 0, tzinfo=timezone.utc) def test_str_to_datetime_infer_tz_aware() -> None: result = ( pl.Series(["2020-01-02T04:00:00+02:00"]) .str.to_datetime(time_unit="us", time_zone="Europe/Vienna") - .single() + .item() ) assert result == datetime(2020, 1, 2, 3, tzinfo=ZoneInfo("Europe/Vienna")) @@ -412,13 +412,13 @@ def test_parse_negative_dates( s = pl.Series([ts]) result = s.str.to_datetime(format, time_unit="ms") # Python datetime.datetime doesn't support negative dates, so comparing - # with `result.single()` directly won't work. - assert result.dt.year().single() == exp_year - assert result.dt.month().single() == exp_month - assert result.dt.day().single() == exp_day - assert result.dt.hour().single() == exp_hour - assert result.dt.minute().single() == exp_minute - assert result.dt.second().single() == exp_second + # with `result.item()` directly won't work. + assert result.dt.year().item() == exp_year + assert result.dt.month().item() == exp_month + assert result.dt.day().item() == exp_day + assert result.dt.hour().item() == exp_hour + assert result.dt.minute().item() == exp_minute + assert result.dt.second().item() == exp_second def test_short_formats() -> None: @@ -445,7 +445,7 @@ def test_strptime_abbrev_month( time_string: str, fmt: str, datatype: PolarsTemporalType, expected: date ) -> None: s = pl.Series([time_string]) - result = s.str.strptime(datatype, fmt).single() + result = s.str.strptime(datatype, fmt).item() assert result == expected @@ -538,7 +538,7 @@ def test_to_datetime_ambiguous_or_non_existent() -> None: ], ) def test_to_datetime_tz_aware_strptime(ts: str, fmt: str, expected: datetime) -> None: - result = pl.Series([ts]).str.to_datetime(fmt).single() + result = pl.Series([ts]).str.to_datetime(fmt).item() assert result == expected @@ -575,7 +575,7 @@ def test_crossing_dst_tz_aware(format: str) -> None: ) def test_strptime_subseconds_datetime(data: str, format: str, expected: time) -> None: s = pl.Series([data]) - result = s.str.to_datetime(format).single() + result = s.str.to_datetime(format).item() assert result == expected @@ -643,7 +643,7 @@ def test_strptime_incomplete_formats(string: str, fmt: str) -> None: ) def test_strptime_complete_formats(string: str, fmt: str, expected: datetime) -> None: # Similar to the above, but these formats are complete and should work - result = pl.Series([string]).str.to_datetime(fmt).single() + result = pl.Series([string]).str.to_datetime(fmt).item() assert result == expected @@ -676,8 +676,8 @@ def test_to_time_inferred(data: str, format: str, expected: time) -> None: def test_to_time_subseconds(data: str, format: str, expected: time) -> None: s = pl.Series([data]) for res in ( - s.str.to_time().single(), - s.str.to_time(format).single(), + s.str.to_time().item(), + s.str.to_time(format).item(), ): assert res == expected @@ -685,7 +685,7 @@ def test_to_time_subseconds(data: str, format: str, expected: time) -> None: def test_to_time_format_warning() -> None: s = pl.Series(["05:10:10.074000"]) with pytest.warns(ChronoFormatWarning, match=".%f"): - result = s.str.to_time("%H:%M:%S.%f").single() + result = s.str.to_time("%H:%M:%S.%f").item() assert result == time(5, 10, 10, 74) @@ -694,14 +694,14 @@ def test_to_datetime_ambiguous_earliest(exact: bool) -> None: result = ( pl.Series(["2020-10-25 01:00"]) .str.to_datetime(time_zone="Europe/London", ambiguous="earliest", exact=exact) - .single() + .item() ) expected = datetime(2020, 10, 25, 1, fold=0, tzinfo=ZoneInfo("Europe/London")) assert result == expected result = ( pl.Series(["2020-10-25 01:00"]) .str.to_datetime(time_zone="Europe/London", ambiguous="latest", exact=exact) - .single() + .item() ) expected = datetime(2020, 10, 25, 1, fold=1, tzinfo=ZoneInfo("Europe/London")) assert result == expected @@ -709,7 +709,7 @@ def test_to_datetime_ambiguous_earliest(exact: bool) -> None: pl.Series(["2020-10-25 01:00"]).str.to_datetime( time_zone="Europe/London", exact=exact, - ).single() + ).item() def test_to_datetime_naive_format_and_time_zone() -> None: @@ -731,7 +731,7 @@ def test_strptime_ambiguous_earliest(exact: bool) -> None: .str.strptime( pl.Datetime("us", "Europe/London"), ambiguous="earliest", exact=exact ) - .single() + .item() ) expected = datetime(2020, 10, 25, 1, fold=0, tzinfo=ZoneInfo("Europe/London")) assert result == expected @@ -740,7 +740,7 @@ def test_strptime_ambiguous_earliest(exact: bool) -> None: .str.strptime( pl.Datetime("us", "Europe/London"), ambiguous="latest", exact=exact ) - .single() + .item() ) expected = datetime(2020, 10, 25, 1, fold=1, tzinfo=ZoneInfo("Europe/London")) assert result == expected @@ -748,7 +748,7 @@ def test_strptime_ambiguous_earliest(exact: bool) -> None: pl.Series(["2020-10-25 01:00"]).str.strptime( pl.Datetime("us", "Europe/London"), exact=exact, - ).single() + ).item() @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) @@ -757,9 +757,7 @@ def test_to_datetime_out_of_range_13401(time_unit: TimeUnit) -> None: with pytest.raises(InvalidOperationError, match="conversion .* failed"): s.str.to_datetime("%Y-%B-%d %H:%M:%S", time_unit=time_unit) assert ( - s.str.to_datetime( - "%Y-%B-%d %H:%M:%S", strict=False, time_unit=time_unit - ).single() + s.str.to_datetime("%Y-%B-%d %H:%M:%S", strict=False, time_unit=time_unit).item() is None ) @@ -824,7 +822,7 @@ def test_strptime_empty_input_22214() -> None: ) def test_matching_strings_but_different_format_22495(value: str) -> None: s = pl.Series("my_strings", [value]) - result = s.str.to_date("%Y-%m-%d", strict=False).single() + result = s.str.to_date("%Y-%m-%d", strict=False).item() assert result is None diff --git a/py-polars/tests/unit/operations/rolling/test_rolling.py b/py-polars/tests/unit/operations/rolling/test_rolling.py index c39878a08c18..848323b36e4b 100644 --- a/py-polars/tests/unit/operations/rolling/test_rolling.py +++ b/py-polars/tests/unit/operations/rolling/test_rolling.py @@ -1100,10 +1100,10 @@ def test_rolling_median_2() -> None: # this can differ because simd sizes and non-associativity of floats. assert df.select( pl.col("x").rolling_median(window_size=10).sum() - ).single() == pytest.approx(5.139429061527812) + ).item() == pytest.approx(5.139429061527812) assert df.select( pl.col("x").rolling_median(window_size=100).sum() - ).single() == pytest.approx(26.60506093611384) + ).item() == pytest.approx(26.60506093611384) @pytest.mark.parametrize( @@ -1801,5 +1801,5 @@ def test_rolling_rank_method_random( ).all() ) .collect() - .single() + .item() ) diff --git a/py-polars/tests/unit/operations/test_cast.py b/py-polars/tests/unit/operations/test_cast.py index e723d2b509f0..a354572bcdbd 100644 --- a/py-polars/tests/unit/operations/test_cast.py +++ b/py-polars/tests/unit/operations/test_cast.py @@ -180,7 +180,7 @@ def _cast_series( dtype_out: PolarsDataType, strict: bool, ) -> int | datetime | date | time | timedelta | None: - return pl.Series("a", [val], dtype=dtype_in).cast(dtype_out, strict=strict).single() # type: ignore[no-any-return] + return pl.Series("a", [val], dtype=dtype_in).cast(dtype_out, strict=strict).item() # type: ignore[no-any-return] def _cast_expr( @@ -193,7 +193,7 @@ def _cast_expr( pl.Series("a", [val], dtype=dtype_in) .to_frame() .select(pl.col("a").cast(dtype_out, strict=strict)) - .single() + .item() ) @@ -203,9 +203,7 @@ def _cast_lit( dtype_out: PolarsDataType, strict: bool, ) -> int | datetime | date | time | timedelta | None: - return ( # type: ignore[no-any-return] - pl.select(pl.lit(val, dtype=dtype_in).cast(dtype_out, strict=strict)).single() - ) + return pl.select(pl.lit(val, dtype=dtype_in).cast(dtype_out, strict=strict)).item() # type: ignore[no-any-return] @pytest.mark.parametrize( @@ -366,13 +364,13 @@ def test_strict_cast_temporal( args = [value, from_dtype, to_dtype, True] if should_succeed: out = _cast_series_t(*args) # type: ignore[arg-type] - assert out.single() == expected_value + assert out.item() == expected_value assert out.dtype == to_dtype out = _cast_expr_t(*args) # type: ignore[arg-type] - assert out.single() == expected_value + assert out.item() == expected_value assert out.dtype == to_dtype out = _cast_lit_t(*args) # type: ignore[arg-type] - assert out.single() == expected_value + assert out.item() == expected_value assert out.dtype == to_dtype else: with pytest.raises(InvalidOperationError): @@ -438,23 +436,23 @@ def test_cast_temporal( args = [value, from_dtype, to_dtype, False] out = _cast_series_t(*args) # type: ignore[arg-type] if expected_value is None: - assert out.single() is None + assert out.item() is None else: - assert out.single() == expected_value + assert out.item() == expected_value assert out.dtype == to_dtype out = _cast_expr_t(*args) # type: ignore[arg-type] if expected_value is None: - assert out.single() is None + assert out.item() is None else: - assert out.single() == expected_value + assert out.item() == expected_value assert out.dtype == to_dtype out = _cast_lit_t(*args) # type: ignore[arg-type] if expected_value is None: - assert out.single() is None + assert out.item() is None else: - assert out.single() == expected_value + assert out.item() == expected_value assert out.dtype == to_dtype @@ -488,23 +486,23 @@ def test_cast_string( args = [value, from_dtype, to_dtype, False] out = _cast_series_t(*args) # type: ignore[arg-type] if expected_value is None: - assert out.single() is None + assert out.item() is None else: - assert out.single() == expected_value + assert out.item() == expected_value assert out.dtype == to_dtype out = _cast_expr_t(*args) # type: ignore[arg-type] if expected_value is None: - assert out.single() is None + assert out.item() is None else: - assert out.single() == expected_value + assert out.item() == expected_value assert out.dtype == to_dtype out = _cast_lit_t(*args) # type: ignore[arg-type] if expected_value is None: - assert out.single() is None + assert out.item() is None else: - assert out.single() == expected_value + assert out.item() == expected_value assert out.dtype == to_dtype @@ -540,13 +538,13 @@ def test_strict_cast_string( args = [value, from_dtype, to_dtype, True] if should_succeed: out = _cast_series_t(*args) # type: ignore[arg-type] - assert out.single() == expected_value + assert out.item() == expected_value assert out.dtype == to_dtype out = _cast_expr_t(*args) # type: ignore[arg-type] - assert out.single() == expected_value + assert out.item() == expected_value assert out.dtype == to_dtype out = _cast_lit_t(*args) # type: ignore[arg-type] - assert out.single() == expected_value + assert out.item() == expected_value assert out.dtype == to_dtype else: with pytest.raises(InvalidOperationError): @@ -706,14 +704,14 @@ def test_all_null_cast_5826() -> None: df = pl.DataFrame(data=[pl.Series("a", [None], dtype=pl.String)]) out = df.with_columns(pl.col("a").cast(pl.Boolean)) assert out.dtypes == [pl.Boolean] - assert out.single() is None + assert out.item() is None @pytest.mark.parametrize("dtype", INTEGER_DTYPES) def test_bool_numeric_supertype(dtype: PolarsDataType) -> None: df = pl.DataFrame({"v": [1, 2, 3, 4, 5, 6]}) result = df.select((pl.col("v") < 3).sum().cast(dtype) / pl.len()) - assert result.single() - 0.3333333 <= 0.00001 + assert result.item() - 0.3333333 <= 0.00001 @pytest.mark.parametrize("dtype", [pl.String(), pl.String, str]) diff --git a/py-polars/tests/unit/operations/test_comparison.py b/py-polars/tests/unit/operations/test_comparison.py index 4371a3cccb33..2634971d3e15 100644 --- a/py-polars/tests/unit/operations/test_comparison.py +++ b/py-polars/tests/unit/operations/test_comparison.py @@ -140,7 +140,7 @@ def test_offset_handling_arg_where_7863() -> None: assert ( df_check.select((pl.lit(0).append(pl.col("a")).append(0)) != 0) .select(pl.col("literal").arg_true()) - .single() + .item() == 2 ) @@ -456,10 +456,10 @@ def test_schema_ne_missing_9256() -> None: def test_nested_binary_literal_super_type_12227() -> None: # The `.alias` is important here to trigger the bug. result = pl.select(x=1).select((pl.lit(0) + ((pl.col("x") > 0) * 0.1)).alias("x")) - assert result.single() == 0.1 + assert result.item() == 0.1 result = pl.select((pl.lit(0) + (pl.lit(0) == pl.lit(0)) * pl.lit(0.1)) + pl.lit(0)) - assert result.single() == 0.1 + assert result.item() == 0.1 def test_struct_broadcasting_comparison() -> None: diff --git a/py-polars/tests/unit/operations/test_fill_null.py b/py-polars/tests/unit/operations/test_fill_null.py index ff2a79d3e712..bc3099ef1a2a 100644 --- a/py-polars/tests/unit/operations/test_fill_null.py +++ b/py-polars/tests/unit/operations/test_fill_null.py @@ -44,7 +44,7 @@ def test_fill_null_non_lit() -> None: "d": pl.Series([None, 2], dtype=pl.Decimal(10, 2)), } ) - assert df.fill_null(0).select(pl.all().null_count()).transpose().sum().single() == 0 + assert df.fill_null(0).select(pl.all().null_count()).transpose().sum().item() == 0 def test_fill_null_f32_with_lit() -> None: @@ -62,7 +62,7 @@ def test_fill_null_lit_() -> None: } ) assert ( - df.fill_null(pl.lit(0)).select(pl.all().null_count()).transpose().sum().single() + df.fill_null(pl.lit(0)).select(pl.all().null_count()).transpose().sum().item() == 0 ) diff --git a/py-polars/tests/unit/operations/test_has_nulls.py b/py-polars/tests/unit/operations/test_has_nulls.py index 77152412b6d1..7a78c9a09ff8 100644 --- a/py-polars/tests/unit/operations/test_has_nulls.py +++ b/py-polars/tests/unit/operations/test_has_nulls.py @@ -13,7 +13,7 @@ def test_has_nulls_series_no_nulls(s: pl.Series) -> None: @given(df=dataframes(allow_null=False)) def test_has_nulls_expr_no_nulls(df: pl.DataFrame) -> None: result = df.select(pl.all().has_nulls()) - assert result.select(pl.any_horizontal(df.columns)).single() is False + assert result.select(pl.any_horizontal(df.columns)).item() is False @given( diff --git a/py-polars/tests/unit/operations/test_index_of.py b/py-polars/tests/unit/operations/test_index_of.py index 049548b4da8e..95baaac1882d 100644 --- a/py-polars/tests/unit/operations/test_index_of.py +++ b/py-polars/tests/unit/operations/test_index_of.py @@ -126,8 +126,8 @@ def test_integer(dtype: IntegerType) -> None: 3, None, 4, - pl.select(dtype_max).single(), - pl.select(dtype_min).single(), + pl.select(dtype_max).item(), + pl.select(dtype_min).item(), ] series = pl.Series(values, dtype=dtype) sorted_series_asc = series.sort(descending=False) @@ -136,7 +136,7 @@ def test_integer(dtype: IntegerType) -> None: [pl.Series([100, 7], dtype=dtype), series], rechunk=False ) - extra_values = [pl.select(v).single() for v in [dtype_max - 1, dtype_min + 1]] + extra_values = [pl.select(v).item() for v in [dtype_max - 1, dtype_min + 1]] for s in [series, sorted_series_asc, sorted_series_desc, chunked_series]: value: IntoExpr for value in values: diff --git a/py-polars/tests/unit/operations/test_is_in.py b/py-polars/tests/unit/operations/test_is_in.py index 47312a0497cd..71fb7886d069 100644 --- a/py-polars/tests/unit/operations/test_is_in.py +++ b/py-polars/tests/unit/operations/test_is_in.py @@ -160,7 +160,7 @@ def test_is_in_struct() -> None: def test_is_in_null_prop() -> None: - assert pl.Series([None], dtype=pl.Float32).is_in(pl.Series([42])).single() is None + assert pl.Series([None], dtype=pl.Float32).is_in(pl.Series([42])).item() is None assert pl.Series([{"a": None}, None], dtype=pl.Struct({"a": pl.Float32})).is_in( pl.Series([{"a": 42}], dtype=pl.Struct({"a": pl.Float32})) ).to_list() == [False, None] @@ -171,7 +171,7 @@ def test_is_in_null_prop() -> None: def test_is_in_9070() -> None: - assert not pl.Series([1]).is_in(pl.Series([1.99])).single() + assert not pl.Series([1]).is_in(pl.Series([1.99])).item() def test_is_in_float_list_10764() -> None: diff --git a/py-polars/tests/unit/operations/test_join.py b/py-polars/tests/unit/operations/test_join.py index d23555a19cf4..eb157c42be61 100644 --- a/py-polars/tests/unit/operations/test_join.py +++ b/py-polars/tests/unit/operations/test_join.py @@ -222,7 +222,7 @@ def test_right_join_schema_maintained_22516() -> None: .collect() ) - assert lazy_join.single() == eager_join.single() + assert lazy_join.item() == eager_join.item() def test_join() -> None: @@ -1737,7 +1737,7 @@ def test_select_after_join_where_20831() -> None: pl.Series("d", [None, None, 7, 8, 8, 8]).to_frame(), ) - assert q.select(pl.len()).collect().single() == 6 + assert q.select(pl.len()).collect().item() == 6 q = ( left.join(right, how="cross") @@ -1750,7 +1750,7 @@ def test_select_after_join_where_20831() -> None: pl.Series("d", [None, None, 7, 8, 8, 8]).to_frame(), ) - assert q.select(pl.len()).collect().single() == 6 + assert q.select(pl.len()).collect().item() == 6 @pytest.mark.parametrize( @@ -1871,7 +1871,7 @@ def test_select_len_after_semi_anti_join_21343() -> None: q = lhs.join(rhs, on="a", how="anti").select(pl.len()) - assert q.collect().single() == 0 + assert q.collect().item() == 0 def test_multi_leftjoin_empty_right_21701() -> None: @@ -3662,7 +3662,7 @@ def test_join_rewrite_null_preserving_exprs( .select(expr_func(pl.first())) .select(pl.first().is_null() | ~pl.first()) .to_series() - .single() + .item() ) q = lhs.join(rhs, on="a", how="left", maintain_order="left_right").filter( diff --git a/py-polars/tests/unit/operations/test_statistics.py b/py-polars/tests/unit/operations/test_statistics.py index 8e4104b70594..3df6ad7a8e7d 100644 --- a/py-polars/tests/unit/operations/test_statistics.py +++ b/py-polars/tests/unit/operations/test_statistics.py @@ -67,7 +67,7 @@ def test_cov_corr_f32_type() -> None: def test_cov(fruits_cars: pl.DataFrame) -> None: ldf = fruits_cars.lazy() for cov_ab in (pl.cov(pl.col("A"), pl.col("B")), pl.cov("A", "B")): - assert cast(float, ldf.select(cov_ab).collect().single()) == -2.5 + assert cast(float, ldf.select(cov_ab).collect().item()) == -2.5 def test_std(fruits_cars: pl.DataFrame) -> None: diff --git a/py-polars/tests/unit/operations/test_transpose.py b/py-polars/tests/unit/operations/test_transpose.py index 7cc0486ee08e..591cea081909 100644 --- a/py-polars/tests/unit/operations/test_transpose.py +++ b/py-polars/tests/unit/operations/test_transpose.py @@ -195,4 +195,4 @@ def test_transpose_multiple_chunks() -> None: def test_nested_struct_transpose_21923() -> None: df = pl.DataFrame({"x": [{"a": {"b": 1, "c": 2}}]}) - assert df.transpose().single() == df.single() + assert df.transpose().item() == df.item() diff --git a/py-polars/tests/unit/operations/test_value_counts.py b/py-polars/tests/unit/operations/test_value_counts.py index d306a459e2ea..7b1c2e25cf74 100644 --- a/py-polars/tests/unit/operations/test_value_counts.py +++ b/py-polars/tests/unit/operations/test_value_counts.py @@ -80,7 +80,7 @@ def test_value_counts_duplicate_name() -> None: df = pl.DataFrame({"a": [None, 1, None, 2, 3]}) result = df.select(pl.col("a").count()) - assert result.single() == 3 + assert result.item() == 3 result = df.group_by(1).agg(pl.col("a").count()) assert result.to_dict(as_series=False) == {"literal": [1], "a": [3]} diff --git a/py-polars/tests/unit/operations/test_window.py b/py-polars/tests/unit/operations/test_window.py index 654f0afc84a9..59b41fad5b88 100644 --- a/py-polars/tests/unit/operations/test_window.py +++ b/py-polars/tests/unit/operations/test_window.py @@ -510,7 +510,7 @@ def test_window_chunked_std_17102() -> None: df = pl.concat([c1, c2], rechunk=False) out = df.select(pl.col("B").std().over("A").alias("std")) - assert out.unique().single() == 0.7071067811865476 + assert out.unique().item() == 0.7071067811865476 def test_window_17308() -> None: @@ -548,10 +548,8 @@ def test_order_by_sorted_keys_18943() -> None: def test_nested_window_keys() -> None: df = pl.DataFrame({"x": 1, "y": "two"}) - assert ( - df.select(pl.col("y").first().over(pl.struct("x").implode())).single() == "two" - ) - assert df.select(pl.col("y").first().over(pl.struct("x"))).single() == "two" + assert df.select(pl.col("y").first().over(pl.struct("x").implode())).item() == "two" + assert df.select(pl.col("y").first().over(pl.struct("x"))).item() == "two" def test_window_21692() -> None: diff --git a/py-polars/tests/unit/operations/unique/test_n_unique.py b/py-polars/tests/unit/operations/unique/test_n_unique.py index 4791ea52f2a3..978dc594e91c 100644 --- a/py-polars/tests/unit/operations/unique/test_n_unique.py +++ b/py-polars/tests/unit/operations/unique/test_n_unique.py @@ -76,4 +76,4 @@ def test_n_unique_array() -> None: ) assert df["arr"].dtype == pl.Array assert df.select(pl.col("arr")).n_unique() == 3 - assert df.select(pl.col("arr").n_unique()).single() == 3 + assert df.select(pl.col("arr").n_unique()).item() == 3 diff --git a/py-polars/tests/unit/operations/unique/test_unique.py b/py-polars/tests/unit/operations/unique/test_unique.py index 33b6f2067341..1135e9fec60c 100644 --- a/py-polars/tests/unit/operations/unique/test_unique.py +++ b/py-polars/tests/unit/operations/unique/test_unique.py @@ -218,7 +218,7 @@ def test_categorical_unique_19409() -> None: df = pl.DataFrame({"x": [str(n % 50) for n in range(127)]}).cast(pl.Categorical) uniq = df.unique() assert uniq.height == 50 - assert uniq.null_count().single() == 0 + assert uniq.null_count().item() == 0 assert set(uniq["x"]) == set(df["x"]) @@ -254,7 +254,7 @@ def test_unique_check_order_20480() -> None: .sort("key", "number") .unique(subset="key", keep="first") .collect()["number"] - .single() + .item() == 1 ) diff --git a/py-polars/tests/unit/series/test_single.py b/py-polars/tests/unit/series/test_item.py similarity index 59% rename from py-polars/tests/unit/series/test_single.py rename to py-polars/tests/unit/series/test_item.py index 5c306025ab7c..7d8be87ee946 100644 --- a/py-polars/tests/unit/series/test_single.py +++ b/py-polars/tests/unit/series/test_item.py @@ -7,27 +7,19 @@ import polars as pl -def test_series_single() -> None: +def test_series_item() -> None: s = pl.Series("a", [1]) - assert s.single() == 1 - with pytest.warns(DeprecationWarning): - assert s.item() == 1 + assert s.item() == 1 -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_series_single_empty() -> None: +def test_series_item_empty() -> None: s = pl.Series("a", []) - with pytest.raises(ValueError): - s.single() with pytest.raises(ValueError): s.item() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_series_single_incorrect_shape() -> None: +def test_series_item_incorrect_shape() -> None: s = pl.Series("a", [1, 2]) - with pytest.raises(ValueError): - s.single() with pytest.raises(ValueError): s.item() @@ -38,20 +30,17 @@ def s() -> pl.Series: @pytest.mark.parametrize(("index", "expected"), [(0, 1), (1, 2), (-1, 2), (-2, 1)]) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_series_item_with_index(index: int, expected: int, s: pl.Series) -> None: assert s.item(index) == expected @pytest.mark.parametrize("index", [-10, 10]) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_df_item_out_of_bounds(index: int, s: pl.Series) -> None: with pytest.raises(IndexError, match="out of bounds"): s.item(index) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_series_item_out_of_range_date() -> None: s = pl.Series([datetime.date(9999, 12, 31)]).dt.offset_by("1d") with pytest.raises(ValueError, match="out of range"): - s.single() + s.item() diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index da26b1580f9e..556bb04ef12d 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -1021,11 +1021,11 @@ def test_mode() -> None: df = pl.DataFrame([s]) assert df.select([pl.col("a").mode()])["a"].to_list() == [1] assert ( - pl.Series(["foo", "bar", "buz", "bar"], dtype=pl.Categorical).mode().single() + pl.Series(["foo", "bar", "buz", "bar"], dtype=pl.Categorical).mode().item() == "bar" ) - assert pl.Series([1.0, 2.0, 3.0, 2.0]).mode().single() == 2.0 - assert pl.Series(["a", "b", "c", "b"]).mode().single() == "b" + assert pl.Series([1.0, 2.0, 3.0, 2.0]).mode().item() == 2.0 + assert pl.Series(["a", "b", "c", "b"]).mode().item() == "b" # sorted data assert set(pl.int_range(0, 3, eager=True).mode().to_list()) == {0, 1, 2} @@ -2054,8 +2054,8 @@ def test_upper_lower_bounds( dtype: PolarsDataType, upper: int | float, lower: int | float ) -> None: s = pl.Series("s", dtype=dtype) - assert s.lower_bound().single() == lower - assert s.upper_bound().single() == upper + assert s.lower_bound().item() == lower + assert s.upper_bound().item() == upper def test_numpy_series_arithmetic() -> None: diff --git a/py-polars/tests/unit/sql/test_literals.py b/py-polars/tests/unit/sql/test_literals.py index 9fa6a65fdc30..ebf6834e0664 100644 --- a/py-polars/tests/unit/sql/test_literals.py +++ b/py-polars/tests/unit/sql/test_literals.py @@ -106,7 +106,7 @@ def test_dollar_quoted_literals() -> None: assert df.to_dict(as_series=False) == {f"dq{n}": ["xyz"] for n in range(1, 5)} df = pl.sql("SELECT $$x$z$$ AS dq").collect() - assert df.single() == "x$z" + assert df.item() == "x$z" def test_fixed_intervals() -> None: diff --git a/py-polars/tests/unit/sql/test_strings.py b/py-polars/tests/unit/sql/test_strings.py index 4c1c561e3b61..dcb9b983bc0b 100644 --- a/py-polars/tests/unit/sql/test_strings.py +++ b/py-polars/tests/unit/sql/test_strings.py @@ -272,7 +272,7 @@ def test_string_like_multiline() -> None: # exact match for s in (s1, s2, s3): - assert df.sql(f"SELECT txt FROM self WHERE txt LIKE '{s}'").single() == s + assert df.sql(f"SELECT txt FROM self WHERE txt LIKE '{s}'").item() == s @pytest.mark.parametrize("form", ["NFKC", "NFKD"]) diff --git a/py-polars/tests/unit/sql/test_structs.py b/py-polars/tests/unit/sql/test_structs.py index 556b8a4d491d..cbca9b7f8672 100644 --- a/py-polars/tests/unit/sql/test_structs.py +++ b/py-polars/tests/unit/sql/test_structs.py @@ -151,7 +151,7 @@ def test_struct_field_operator_access(expr: str, expected: int | str) -> None: }, }, ) - assert df.sql(f"SELECT {expr} FROM self").single() == expected + assert df.sql(f"SELECT {expr} FROM self").item() == expected @pytest.mark.parametrize( diff --git a/py-polars/tests/unit/sql/test_table_operations.py b/py-polars/tests/unit/sql/test_table_operations.py index 96b13cd27b5f..7220a5809ea4 100644 --- a/py-polars/tests/unit/sql/test_table_operations.py +++ b/py-polars/tests/unit/sql/test_table_operations.py @@ -77,7 +77,7 @@ def test_explain_query(test_frame: pl.LazyFrame) -> None: ctx.execute("EXPLAIN SELECT * FROM frame") .select(pl.col("Logical Plan").str.join()) .collect() - .single() + .item() ) assert ( re.search( diff --git a/py-polars/tests/unit/streaming/test_streaming.py b/py-polars/tests/unit/streaming/test_streaming.py index 02e7c3864c26..8dc7dc2bef4d 100644 --- a/py-polars/tests/unit/streaming/test_streaming.py +++ b/py-polars/tests/unit/streaming/test_streaming.py @@ -379,6 +379,6 @@ def test_i128_sum_reduction() -> None: .lazy() .sum() .collect(engine="streaming") - .single() + .item() == 6 ) diff --git a/py-polars/tests/unit/streaming/test_streaming_group_by.py b/py-polars/tests/unit/streaming/test_streaming_group_by.py index f79da04bbd8c..f462ebed6b10 100644 --- a/py-polars/tests/unit/streaming/test_streaming_group_by.py +++ b/py-polars/tests/unit/streaming/test_streaming_group_by.py @@ -463,7 +463,7 @@ def test_streaming_group_by_binary_15116() -> None: def test_streaming_group_by_convert_15380(partition_limit: int) -> None: assert ( - pl.DataFrame({"a": [1] * partition_limit}).group_by(b="a").len()["len"].single() + pl.DataFrame({"a": [1] * partition_limit}).group_by(b="a").len()["len"].item() == partition_limit ) diff --git a/py-polars/tests/unit/test_datatype_exprs.py b/py-polars/tests/unit/test_datatype_exprs.py index 5cbf46369ed4..d27bb51cb2b3 100644 --- a/py-polars/tests/unit/test_datatype_exprs.py +++ b/py-polars/tests/unit/test_datatype_exprs.py @@ -133,7 +133,7 @@ def test_classification(selector: cs.Selector, fn_tags: list[str]) -> None: dtype_expr = dtype.to_dtype_expr() expr = dtype_expr.matches(selector) expected = dtype_tag in fn_tags - assert pl.select(expr).to_series().single() == expected + assert pl.select(expr).to_series().item() == expected @pytest.mark.parametrize( @@ -148,7 +148,7 @@ def test_int_signed_classification(selector: cs.Selector, fn_tag: str) -> None: dtype_expr = dtype.to_dtype_expr() expr = dtype_expr.matches(selector) expected = dtype_tag == fn_tag - assert pl.select(expr).to_series().single() == expected + assert pl.select(expr).to_series().item() == expected def test_array_width_classification() -> None: @@ -157,21 +157,21 @@ def test_array_width_classification() -> None: assert ( pl.select(arr_dtype.to_dtype_expr().matches(cs.array(width=2))) .to_series() - .single() + .item() ) assert not ( pl.select(arr_dtype.to_dtype_expr().matches(cs.array(width=3))) .to_series() - .single() + .item() ) def test_array_width() -> None: arr_dtype = pl.Array(pl.String, 2) - assert pl.select(arr_dtype.to_dtype_expr().arr.width()).to_series().single() == 2 + assert pl.select(arr_dtype.to_dtype_expr().arr.width()).to_series().item() == 2 arr_dtype = pl.Array(pl.String, 3) - assert pl.select(arr_dtype.to_dtype_expr().arr.width()).to_series().single() == 3 + assert pl.select(arr_dtype.to_dtype_expr().arr.width()).to_series().item() == 3 def test_array_shape() -> None: @@ -202,19 +202,19 @@ def test_inner_dtype() -> None: assert ( pl.select(arr_dtype.to_dtype_expr().inner_dtype() == pl.String) .to_series() - .single() + .item() ) assert ( pl.select(arr_dtype.to_dtype_expr().arr.inner_dtype() == pl.String) .to_series() - .single() + .item() ) with pytest.raises(pl.exceptions.SchemaError): arr_dtype.to_dtype_expr().list.inner_dtype().collect_dtype({}) list_dtype = pl.List(pl.String).to_dtype_expr() - assert pl.select(list_dtype.inner_dtype() == pl.String).to_series().single() - assert pl.select(list_dtype.list.inner_dtype() == pl.String).to_series().single() + assert pl.select(list_dtype.inner_dtype() == pl.String).to_series().item() + assert pl.select(list_dtype.list.inner_dtype() == pl.String).to_series().item() with pytest.raises(pl.exceptions.SchemaError): list_dtype.arr.inner_dtype().collect_dtype({}) @@ -222,7 +222,7 @@ def test_inner_dtype() -> None: def test_display() -> None: for dtype, _, dtype_str, _ in DTYPES: assert ( - pl.select(dtype.to_dtype_expr().display()).to_series().single() == dtype_str + pl.select(dtype.to_dtype_expr().display()).to_series().item() == dtype_str ) @@ -320,7 +320,7 @@ def test_default_value_int(dtype: pl.DataType, numeric_to_one: bool) -> None: dtype.to_dtype_expr().default_value(numeric_to_one=numeric_to_one) ).to_series() assert result.dtype == dtype - assert result.single() == (1 if numeric_to_one else 0) + assert result.item() == (1 if numeric_to_one else 0) @pytest.mark.parametrize("dtype", sorted(FLOAT_DTYPES, key=lambda v: str(v))) @@ -330,32 +330,32 @@ def test_default_value_float(dtype: pl.DataType, numeric_to_one: bool) -> None: dtype.to_dtype_expr().default_value(numeric_to_one=numeric_to_one) ).to_series() assert result.dtype == dtype - assert result.single() == (1.0 if numeric_to_one else 0.0) + assert result.item() == (1.0 if numeric_to_one else 0.0) def test_default_value_string() -> None: result = pl.select(pl.String().to_dtype_expr().default_value()).to_series() assert result.dtype == pl.String() - assert result.single() == "" + assert result.item() == "" def test_default_value_binary() -> None: result = pl.select(pl.String().to_dtype_expr().default_value()).to_series() assert result.dtype == pl.String() - assert result.single() == "" + assert result.item() == "" def test_default_value_decimal() -> None: result = pl.select(pl.Decimal(scale=2).to_dtype_expr().default_value()).to_series() assert result.dtype == pl.Decimal(scale=2) - assert result.single() == 0 + assert result.item() == 0 @pytest.mark.parametrize("dtype", sorted(TEMPORAL_DTYPES, key=lambda v: str(v))) def test_default_value_temporal(dtype: pl.DataType) -> None: result = pl.select(dtype.to_dtype_expr().default_value()).to_series() assert result.dtype == dtype - assert result.to_physical().single() == 0 + assert result.to_physical().item() == 0 @pytest.mark.parametrize("numeric_to_one", [False, True]) @@ -395,33 +395,33 @@ def test_default_value_object() -> None: dtype = pl.Object() result = pl.select(dtype.to_dtype_expr().default_value()).to_series() assert result.dtype == dtype - assert result.single() is None + assert result.item() is None def test_default_value_null() -> None: dtype = pl.Null() result = pl.select(dtype.to_dtype_expr().default_value()).to_series() assert result.dtype == dtype - assert result.single() is None + assert result.item() is None def test_default_value_categorical() -> None: dtype = pl.Categorical() result = pl.select(dtype.to_dtype_expr().default_value()).to_series() assert result.dtype == dtype - assert result.single() is None + assert result.item() is None def test_default_value_enum() -> None: dtype = pl.Enum([]) result = pl.select(dtype.to_dtype_expr().default_value()).to_series() assert result.dtype == dtype - assert result.single() is None + assert result.item() is None dtype = pl.Enum(["a", "b", "c"]) result = pl.select(dtype.to_dtype_expr().default_value()).to_series() assert result.dtype == dtype - assert result.single() == "a" + assert result.item() == "a" @pytest.mark.parametrize("n", [0, 1, 2, 5]) diff --git a/py-polars/tests/unit/test_datatypes.py b/py-polars/tests/unit/test_datatypes.py index b1bb3dc41abd..e30cd107f0d1 100644 --- a/py-polars/tests/unit/test_datatypes.py +++ b/py-polars/tests/unit/test_datatypes.py @@ -230,5 +230,5 @@ def test_max_min( lower: int | float | time, ) -> None: df = pl.select(min=dtype.min(), max=dtype.max()) - assert df.to_series(0).single() == lower - assert df.to_series(1).single() == upper + assert df.to_series(0).item() == lower + assert df.to_series(1).item() == upper diff --git a/py-polars/tests/unit/test_expansion.py b/py-polars/tests/unit/test_expansion.py index 464042ad94b5..d4bb05440dab 100644 --- a/py-polars/tests/unit/test_expansion.py +++ b/py-polars/tests/unit/test_expansion.py @@ -80,8 +80,8 @@ def test_struct_name_resolving_15430() -> None: .collect(optimizations=pl.QueryOptFlags(projection_pushdown=True)) ) - assert a["b"].single() == "c" - assert b["b"].single() == "c" + assert a["b"].item() == "c" + assert b["b"].item() == "c" assert a.columns == ["b"] assert b.columns == ["b"] diff --git a/py-polars/tests/unit/test_format.py b/py-polars/tests/unit/test_format.py index 48e3a5b2ea89..e0247a847dc3 100644 --- a/py-polars/tests/unit/test_format.py +++ b/py-polars/tests/unit/test_format.py @@ -509,4 +509,4 @@ def test_format_ascii_table_truncation(df: pl.DataFrame, expected: str) -> None: def test_format_21393() -> None: - assert pl.select(pl.format("{}", pl.lit(1, pl.Int128))).single() == "1" + assert pl.select(pl.format("{}", pl.lit(1, pl.Int128))).item() == "1" diff --git a/py-polars/tests/unit/test_polars_import.py b/py-polars/tests/unit/test_polars_import.py index 60d3bf40f731..798257cb653a 100644 --- a/py-polars/tests/unit/test_polars_import.py +++ b/py-polars/tests/unit/test_polars_import.py @@ -20,7 +20,7 @@ def _import_time_from_frame(tm: pl.DataFrame) -> int: return int( tm.filter(pl.col("import").str.strip_chars() == "polars") .select("cumulative_time") - .single() + .item() ) diff --git a/py-polars/tests/unit/test_projections.py b/py-polars/tests/unit/test_projections.py index a9271a557fb0..84f7e3a72085 100644 --- a/py-polars/tests/unit/test_projections.py +++ b/py-polars/tests/unit/test_projections.py @@ -483,7 +483,7 @@ def test_non_coalesce_join_projection_pushdown_16515( left.join(right, how=join_type, left_on="x", right_on="y", coalesce=False) .select("y") .collect() - .single() + .item() == 1 ) @@ -645,7 +645,7 @@ def test_select_len_20337() -> None: ) q = q.with_row_index("foo") - assert q.select(pl.len()).collect().single() == 3 + assert q.select(pl.len()).collect().item() == 3 def test_filter_count_projection_20902() -> None: diff --git a/py-polars/tests/unit/test_row_encoding.py b/py-polars/tests/unit/test_row_encoding.py index 5e944c230824..a3f30bf5b567 100644 --- a/py-polars/tests/unit/test_row_encoding.py +++ b/py-polars/tests/unit/test_row_encoding.py @@ -122,8 +122,8 @@ def test_bool(field: Any) -> None: @pytest.mark.parametrize("dtype", INTEGER_DTYPES) @pytest.mark.parametrize("field", FIELD_COMBS_ARGS) def test_int(dtype: pl.DataType, field: Any) -> None: - min = pl.select(x=dtype.min()).single() # type: ignore[attr-defined] - max = pl.select(x=dtype.max()).single() # type: ignore[attr-defined] + min = pl.select(x=dtype.min()).item() # type: ignore[attr-defined] + max = pl.select(x=dtype.max()).item() # type: ignore[attr-defined] roundtrip_series_re([], dtype, **field) roundtrip_series_re([0], dtype, **field) diff --git a/py-polars/tests/unit/test_scalar.py b/py-polars/tests/unit/test_scalar.py index 8ad043c6e1c6..9e51375a8aa9 100644 --- a/py-polars/tests/unit/test_scalar.py +++ b/py-polars/tests/unit/test_scalar.py @@ -62,7 +62,7 @@ def test_scalar_len_20046() -> None: ) .select(pl.len()) .collect() - .single() + .item() == 3 ) @@ -71,7 +71,7 @@ def test_scalar_len_20046() -> None: pl.col("a").alias("b"), ) - assert q.select(pl.len()).collect().single() == 3 + assert q.select(pl.len()).collect().item() == 3 def test_scalar_identification_function_expr_in_binary() -> None: diff --git a/py-polars/tests/unit/test_selectors.py b/py-polars/tests/unit/test_selectors.py index a2cfd80ae32f..070a808e9d9a 100644 --- a/py-polars/tests/unit/test_selectors.py +++ b/py-polars/tests/unit/test_selectors.py @@ -993,7 +993,7 @@ def test_expand_more_than_one_22567() -> None: assert ( pl.select(x=1, y=2) .select(cs.by_name("x").as_expr() + cs.by_name("y").as_expr()) - .single() + .item() == 3 ) @@ -1028,25 +1028,25 @@ def test_selector_arith_dtypes_12850() -> None: assert ( pl.DataFrame({"a": [2.0], "b": [1]}) .select(cs.float().as_expr() - cs.integer().as_expr()) - .single() + .item() == 1.0 ) assert ( pl.DataFrame({"a": [2.0], "b": [1]}) .select(cs.float().as_expr() + cs.integer().as_expr()) - .single() + .item() == 3.0 ) assert ( pl.DataFrame({"a": [2.0], "b": [1]}) .select(cs.float().as_expr() - cs.last().as_expr()) - .single() + .item() == 1.0 ) assert ( pl.DataFrame({"a": [2.0], "b": [1]}) .select(cs.float().as_expr() - cs.by_name("b").as_expr()) - .single() + .item() == 1.0 ) From 414c3d65254fa6e779e03e6062bda20e42ff47bb Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Thu, 16 Oct 2025 17:23:00 +0200 Subject: [PATCH 15/24] Revert "Deprecate {DataFrame,Series}.item() in favor of .single()" This reverts commit 1a8570207a6d3cf79fd53ba00c47efc404fb139f. --- py-polars/src/polars/dataframe/frame.py | 36 ++++++------------------- py-polars/src/polars/series/series.py | 34 +++++------------------ 2 files changed, 15 insertions(+), 55 deletions(-) diff --git a/py-polars/src/polars/dataframe/frame.py b/py-polars/src/polars/dataframe/frame.py index f1d09f36d4d2..d543242e3e28 100644 --- a/py-polars/src/polars/dataframe/frame.py +++ b/py-polars/src/polars/dataframe/frame.py @@ -1647,11 +1647,6 @@ def collect_schema(self) -> Schema: """ return self.schema - @deprecated( - "`DataFrame.item` is deprecated; " - "for unpacking a single value out of a dataframe as a scalar, use `DataFrame.single()`; " - "for element retrieval, use `Dataframe[row, col]` instead; " - ) def item(self, row: int | None = None, column: int | str | None = None) -> Any: """ Return the DataFrame as a scalar, or return the element at the given row/column. @@ -1683,7 +1678,14 @@ def item(self, row: int | None = None, column: int | str | None = None) -> Any: 6 """ if row is None and column is None: - return self.single() + if self.shape != (1, 1): + msg = ( + "can only call `.item()` if the dataframe is of shape (1, 1)," + " or if explicit row/col values are provided;" + f" frame has shape {self.shape!r}" + ) + raise ValueError(msg) + return self._df.to_series(0).get_index(0) elif row is None or column is None: msg = "cannot call `.item()` with only one of `row` or `column`" @@ -1696,28 +1698,6 @@ def item(self, row: int | None = None, column: int | str | None = None) -> Any: ) return s.get_index_signed(row) - @unstable() - def single(self) -> Any: - """ - Return the single value in a 1x1 DataFrame as a scalar. - - This is equivalent to `df[0,0]`, with a check that the shape is (1,1). - - Examples - -------- - >>> df = pl.DataFrame({"a": [42]}) - >>> df.single() - 42 - """ - if self.shape != (1, 1): - msg = ( - "can only call `.single()` if the dataframe is of shape (1, 1)," - " or if explicit row/col values are provided;" - f" frame has shape {self.shape!r}" - ) - raise ValueError(msg) - return self._df.to_series(0).get_index(0) - @deprecate_renamed_parameter("future", "compat_level", version="1.1") def to_arrow(self, *, compat_level: CompatLevel | None = None) -> pa.Table: """ diff --git a/py-polars/src/polars/series/series.py b/py-polars/src/polars/series/series.py index cac137da4303..8eb81acb8a96 100644 --- a/py-polars/src/polars/series/series.py +++ b/py-polars/src/polars/series/series.py @@ -1608,11 +1608,6 @@ def _repr_html_(self) -> str: """Format output data in HTML for display in Jupyter Notebooks.""" return self.to_frame()._repr_html_(_from_series=True) - @deprecated( - "`Series.item` is deprecated; " - "for unpacking a single value out of a dataframe as a scalar, use `Series.single()`; " - "for element retrieval, use `Series[index]` instead; " - ) def item(self, index: int | None = None) -> Any: """ Return the Series as a scalar, or return the element at the given index. @@ -1630,31 +1625,16 @@ def item(self, index: int | None = None) -> Any: 24 """ if index is None: - return self.single() + if len(self) != 1: + msg = ( + "can only call '.item()' if the Series is of length 1," + f" or an explicit index is provided (Series is of length {len(self)})" + ) + raise ValueError(msg) + return self._s.get_index(0) return self._s.get_index_signed(index) - @unstable() - def single(self) -> Any: - """ - Return the single value in this Series as a scalar. - - This is equivalent to `s[0,0]`, with a check that the series length is 1. - - Examples - -------- - >>> s = pl.Series("a", [42]) - >>> s.single() - 42 - """ - if len(self) != 1: - msg = ( - "can only call '.item()' if the Series is of length 1," - f" or an explicit index is provided (Series is of length {len(self)})" - ) - raise ValueError(msg) - return self._s.get_index(0) - def estimated_size(self, unit: SizeUnit = "b") -> int | float: """ Return an estimation of the total (heap) allocated size of the Series. From e029504f65752159ccf96e2d2693a066df06f954 Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Thu, 16 Oct 2025 17:33:53 +0200 Subject: [PATCH 16/24] Rename .single() to .item() --- crates/polars-core/src/frame/group_by/mod.rs | 6 +-- .../src/expressions/aggregation.rs | 12 +++--- crates/polars-expr/src/planner.rs | 2 +- crates/polars-expr/src/reduce/convert.rs | 6 +-- .../{first_last_single.rs => first_last.rs} | 38 +++++++++---------- crates/polars-expr/src/reduce/mod.rs | 2 +- crates/polars-plan/src/dsl/expr/mod.rs | 4 +- crates/polars-plan/src/dsl/format.rs | 2 +- crates/polars-plan/src/dsl/mod.rs | 4 +- .../polars-plan/src/plans/aexpr/equality.rs | 2 +- crates/polars-plan/src/plans/aexpr/mod.rs | 4 +- crates/polars-plan/src/plans/aexpr/schema.rs | 4 +- .../polars-plan/src/plans/aexpr/traverse.rs | 34 ++++++++--------- .../conversion/dsl_to_ir/expr_expansion.rs | 4 +- .../plans/conversion/dsl_to_ir/expr_to_ir.rs | 4 +- .../src/plans/conversion/ir_to_dsl.rs | 4 +- crates/polars-plan/src/plans/ir/format.rs | 2 +- crates/polars-plan/src/plans/iterator.rs | 2 +- .../optimizer/set_order/expr_pushdown.rs | 2 +- crates/polars-plan/src/plans/visitor/expr.rs | 2 +- crates/polars-python/src/expr/general.rs | 4 +- .../src/lazyframe/visitor/expr_nodes.rs | 4 +- .../src/physical_plan/lower_expr.rs | 2 +- .../src/physical_plan/lower_group_by.rs | 2 +- py-polars/src/polars/_plr.pyi | 2 +- py-polars/src/polars/expr/expr.py | 6 +-- py-polars/src/polars/expr/list.py | 28 +++++++------- py-polars/src/polars/series/list.py | 4 +- .../aggregation/test_aggregations.py | 12 +++--- .../operations/namespaces/list/test_list.py | 2 +- .../tests/unit/operations/test_group_by.py | 4 +- py-polars/tests/unit/test_cse.py | 4 +- py-polars/tests/unit/test_schema.py | 8 ++-- 33 files changed, 110 insertions(+), 112 deletions(-) rename crates/polars-expr/src/reduce/{first_last_single.rs => first_last.rs} (93%) diff --git a/crates/polars-core/src/frame/group_by/mod.rs b/crates/polars-core/src/frame/group_by/mod.rs index 45fec60deaab..6955de9811ee 100644 --- a/crates/polars-core/src/frame/group_by/mod.rs +++ b/crates/polars-core/src/frame/group_by/mod.rs @@ -875,7 +875,7 @@ pub enum GroupByMethod { Mean, First, Last, - Single, + Item, Sum, Groups, NUnique, @@ -898,7 +898,7 @@ impl Display for GroupByMethod { Mean => "mean", First => "first", Last => "last", - Single => "single", + Item => "item", Sum => "sum", Groups => "groups", NUnique => "n_unique", @@ -924,7 +924,7 @@ pub fn fmt_group_by_column(name: &str, method: GroupByMethod) -> PlSmallStr { Mean => format_pl_smallstr!("{name}_mean"), First => format_pl_smallstr!("{name}_first"), Last => format_pl_smallstr!("{name}_last"), - Single => format_pl_smallstr!("{name}_single"), + Item => format_pl_smallstr!("{name}_item"), Sum => format_pl_smallstr!("{name}_sum"), Groups => PlSmallStr::from_static("groups"), NUnique => format_pl_smallstr!("{name}_n_unique"), diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index 2d814a86b69d..e6a4c1f0b9b5 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -130,16 +130,16 @@ impl PhysicalExpr for AggregationExpr { } else { s.tail(Some(1)) }), - GroupByMethod::Single => Ok(match s.len() { + GroupByMethod::Item => Ok(match s.len() { 0 => { return Err(polars_err!(ComputeError: - "aggregation 'single' expected a single value, got none" + "aggregation 'item' expected a single value, got none" )); }, 1 => s.slice(0, 1), n => { return Err(polars_err!(ComputeError: - "aggregation 'single' expected a single value, got {n} values" + "aggregation 'item' expected a single value, got {n} values" )); }, }), @@ -350,21 +350,21 @@ impl PhysicalExpr for AggregationExpr { let agg_s = s.agg_last(&groups); AggregatedScalar(agg_s.with_name(keep_name)) }, - GroupByMethod::Single => { + GroupByMethod::Item => { let (s, groups) = ac.get_final_aggregation(); for gc in groups.group_count().iter() { if let Some(n) = gc && n == 0 { return Err(polars_err!(ComputeError: - "aggregation 'single' expected a single value, got none" + "aggregation 'item' expected a single value, got none" )); } if let Some(n) = gc && n > 1 { return Err(polars_err!(ComputeError: - "aggregation 'single' expected a single value, got {n} values" + "aggregation 'item' expected a single value, got {n} values" )); } } diff --git a/crates/polars-expr/src/planner.rs b/crates/polars-expr/src/planner.rs index b604d6590c71..2511e595d1d4 100644 --- a/crates/polars-expr/src/planner.rs +++ b/crates/polars-expr/src/planner.rs @@ -378,7 +378,7 @@ fn create_physical_expr_inner( I::NUnique(_) => GBM::NUnique, I::First(_) => GBM::First, I::Last(_) => GBM::Last, - I::Single(_) => GBM::Single, + I::Item(_) => GBM::Item, I::Mean(_) => GBM::Mean, I::Implode(_) => GBM::Implode, I::Quantile { .. } => unreachable!(), diff --git a/crates/polars-expr/src/reduce/convert.rs b/crates/polars-expr/src/reduce/convert.rs index e717fbfa6688..7f29be1cedf2 100644 --- a/crates/polars-expr/src/reduce/convert.rs +++ b/crates/polars-expr/src/reduce/convert.rs @@ -11,9 +11,7 @@ use crate::reduce::bitwise::{ new_bitwise_and_reduction, new_bitwise_or_reduction, new_bitwise_xor_reduction, }; use crate::reduce::count::{CountReduce, NullCountReduce}; -use crate::reduce::first_last_single::{ - new_first_reduction, new_last_reduction, new_single_reduction, -}; +use crate::reduce::first_last::{new_first_reduction, new_item_reduction, new_last_reduction}; use crate::reduce::len::LenReduce; use crate::reduce::mean::new_mean_reduction; use crate::reduce::min_max::{new_max_reduction, new_min_reduction}; @@ -53,7 +51,7 @@ pub fn into_reduction( }, IRAggExpr::First(input) => (new_first_reduction(get_dt(*input)?), *input), IRAggExpr::Last(input) => (new_last_reduction(get_dt(*input)?), *input), - IRAggExpr::Single(input) => (new_single_reduction(get_dt(*input)?), *input), + IRAggExpr::Item(input) => (new_item_reduction(get_dt(*input)?), *input), IRAggExpr::Count { input, include_nulls, diff --git a/crates/polars-expr/src/reduce/first_last_single.rs b/crates/polars-expr/src/reduce/first_last.rs similarity index 93% rename from crates/polars-expr/src/reduce/first_last_single.rs rename to crates/polars-expr/src/reduce/first_last.rs index d1cbd2a89c0c..277481dda1ed 100644 --- a/crates/polars-expr/src/reduce/first_last_single.rs +++ b/crates/polars-expr/src/reduce/first_last.rs @@ -15,8 +15,8 @@ pub fn new_last_reduction(dtype: DataType) -> Box { new_reduction_with_policy::(dtype) } -pub fn new_single_reduction(dtype: DataType) -> Box { - new_reduction_with_policy::(dtype) +pub fn new_item_reduction(dtype: DataType) -> Box { + new_reduction_with_policy::(dtype) } fn new_reduction_with_policy(dtype: DataType) -> Box { @@ -47,7 +47,7 @@ fn new_reduction_with_policy(dtype: DataType) -> Box usize; fn should_replace(new: u64, old: u64) -> bool; - fn is_single() -> bool { + fn is_item_policy() -> bool { false } } @@ -76,8 +76,8 @@ impl Policy for Last { } } -struct Single; -impl Policy for Single { +struct Item; +impl Policy for Item { fn index(_len: usize) -> usize { 0 } @@ -86,7 +86,7 @@ impl Policy for Single { old == 0 } - fn is_single() -> bool { + fn is_item_policy() -> bool { true } } @@ -154,8 +154,8 @@ where dtype: &DataType, ) -> PolarsResult { assert!(m.is_none()); // This should only be used with VecGroupedReduction. - if P::is_single() { - check_single_value(&v)?; + if P::is_item_policy() { + check_item_count_is_one(&v)?; } let ca: ChunkedArray = v .into_iter() @@ -230,8 +230,8 @@ where dtype: &DataType, ) -> PolarsResult { assert!(m.is_none()); // This should only be used with VecGroupedReduction. - if P::is_single() { - check_single_value(&v)?; + if P::is_item_policy() { + check_item_count_is_one(&v)?; } let ca: BinaryChunked = v .into_iter() @@ -291,8 +291,8 @@ where _dtype: &DataType, ) -> PolarsResult { assert!(m.is_none()); // This should only be used with VecGroupedReduction. - if P::is_single() { - check_single_value(&v)?; + if P::is_item_policy() { + check_item_count_is_one(&v)?; } let ca: BooleanChunked = v .into_iter() @@ -431,10 +431,10 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< fn finalize(&mut self) -> PolarsResult { self.seqs.clear(); - if P::is_single() { + if P::is_item_policy() { for count in self.counts.iter() { if *count != 1 { - return Err(single_count_err(*count)); + return Err(item_count_err(*count)); } } } @@ -452,22 +452,22 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< } } -fn check_single_value(v: &[Value]) -> PolarsResult<()> { +fn check_item_count_is_one(v: &[Value]) -> PolarsResult<()> { if let Some(Value { count: n, .. }) = v.iter().find(|v| v.count != 1) { - Err(single_count_err(*n)) + Err(item_count_err(*n)) } else { Ok(()) } } -fn single_count_err(n: u64) -> PolarsError { +fn item_count_err(n: u64) -> PolarsError { if n == 0 { polars_err!(ComputeError: - "aggregation 'single' expected a single value, got none" + "aggregation 'item' expected a single value, got none" ) } else if n > 1 { polars_err!(ComputeError: - "aggregation 'single' expected a single value, got {n} values" + "aggregation 'item' expected a single value, got {n} values" ) } else { unreachable!() diff --git a/crates/polars-expr/src/reduce/mod.rs b/crates/polars-expr/src/reduce/mod.rs index 8820166e3ca5..4d58c62f515d 100644 --- a/crates/polars-expr/src/reduce/mod.rs +++ b/crates/polars-expr/src/reduce/mod.rs @@ -6,7 +6,7 @@ mod approx_n_unique; mod bitwise; mod convert; mod count; -mod first_last_single; +mod first_last; mod len; mod mean; mod min_max; diff --git a/crates/polars-plan/src/dsl/expr/mod.rs b/crates/polars-plan/src/dsl/expr/mod.rs index 68550b3f8582..90c8713429ab 100644 --- a/crates/polars-plan/src/dsl/expr/mod.rs +++ b/crates/polars-plan/src/dsl/expr/mod.rs @@ -37,7 +37,7 @@ pub enum AggExpr { NUnique(Arc), First(Arc), Last(Arc), - Single(Arc), + Item(Arc), Mean(Arc), Implode(Arc), Count { @@ -65,7 +65,7 @@ impl AsRef for AggExpr { NUnique(e) => e, First(e) => e, Last(e) => e, - Single(e) => e, + Item(e) => e, Mean(e) => e, Implode(e) => e, Count { input, .. } => input, diff --git a/crates/polars-plan/src/dsl/format.rs b/crates/polars-plan/src/dsl/format.rs index 18c89a3984ec..aecca2d16cc6 100644 --- a/crates/polars-plan/src/dsl/format.rs +++ b/crates/polars-plan/src/dsl/format.rs @@ -113,7 +113,7 @@ impl fmt::Debug for Expr { Mean(expr) => write!(f, "{expr:?}.mean()"), First(expr) => write!(f, "{expr:?}.first()"), Last(expr) => write!(f, "{expr:?}.last()"), - Single(expr) => write!(f, "{expr:?}.item()"), + Item(expr) => write!(f, "{expr:?}.item()"), Implode(expr) => write!(f, "{expr:?}.list()"), NUnique(expr) => write!(f, "{expr:?}.n_unique()"), Sum(expr) => write!(f, "{expr:?}.sum()"), diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs index e0278f99f345..fd1c7610c73a 100644 --- a/crates/polars-plan/src/dsl/mod.rs +++ b/crates/polars-plan/src/dsl/mod.rs @@ -177,8 +177,8 @@ impl Expr { } /// Get the single value in the group. If there are multiple values, an error is returned. - pub fn single(self) -> Self { - AggExpr::Single(Arc::new(self)).into() + pub fn item(self) -> Self { + AggExpr::Item(Arc::new(self)).into() } /// GroupBy the group to a Series. diff --git a/crates/polars-plan/src/plans/aexpr/equality.rs b/crates/polars-plan/src/plans/aexpr/equality.rs index a9b3da8cff7c..9565e5d2dd17 100644 --- a/crates/polars-plan/src/plans/aexpr/equality.rs +++ b/crates/polars-plan/src/plans/aexpr/equality.rs @@ -111,7 +111,7 @@ impl IRAggExpr { A::NUnique(_) | A::First(_) | A::Last(_) | - A::Single(_) | + A::Item(_) | A::Mean(_) | A::Implode(_) | A::Sum(_) | diff --git a/crates/polars-plan/src/plans/aexpr/mod.rs b/crates/polars-plan/src/plans/aexpr/mod.rs index 0c4f7d7d4c7c..78a42036dcb8 100644 --- a/crates/polars-plan/src/plans/aexpr/mod.rs +++ b/crates/polars-plan/src/plans/aexpr/mod.rs @@ -48,7 +48,7 @@ pub enum IRAggExpr { NUnique(Node), First(Node), Last(Node), - Single(Node), + Item(Node), Mean(Node), Implode(Node), Quantile { @@ -147,7 +147,7 @@ impl From for GroupByMethod { NUnique(_) => GroupByMethod::NUnique, First(_) => GroupByMethod::First, Last(_) => GroupByMethod::Last, - Single(_) => GroupByMethod::Single, + Item(_) => GroupByMethod::Item, Mean(_) => GroupByMethod::Mean, Implode(_) => GroupByMethod::Implode, Sum(_) => GroupByMethod::Sum, diff --git a/crates/polars-plan/src/plans/aexpr/schema.rs b/crates/polars-plan/src/plans/aexpr/schema.rs index e92b16e12e5d..21d36aa05f5b 100644 --- a/crates/polars-plan/src/plans/aexpr/schema.rs +++ b/crates/polars-plan/src/plans/aexpr/schema.rs @@ -140,7 +140,7 @@ impl AExpr { | Min { input: expr, .. } | First(expr) | Last(expr) - | Single(expr) => ctx.arena.get(*expr).to_field_impl(ctx), + | Item(expr) => ctx.arena.get(*expr).to_field_impl(ctx), Sum(expr) => { let mut field = ctx.arena.get(*expr).to_field_impl(ctx)?; let dt = match field.dtype() { @@ -320,7 +320,7 @@ impl AExpr { | Agg(Min { input: expr, .. }) | Agg(First(expr)) | Agg(Last(expr)) - | Agg(Single(expr)) + | Agg(Item(expr)) | Agg(Sum(expr)) | Agg(Median(expr)) | Agg(Mean(expr)) diff --git a/crates/polars-plan/src/plans/aexpr/traverse.rs b/crates/polars-plan/src/plans/aexpr/traverse.rs index 545fe8d0359e..64a75980127e 100644 --- a/crates/polars-plan/src/plans/aexpr/traverse.rs +++ b/crates/polars-plan/src/plans/aexpr/traverse.rs @@ -244,24 +244,24 @@ impl AExpr { impl IRAggExpr { pub fn get_input(&self) -> NodeInputs { use IRAggExpr::*; - use NodeInputs as NI; + use NodeInputs::*; match self { - Min { input, .. } => NI::Single(*input), - Max { input, .. } => NI::Single(*input), - Median(input) => NI::Single(*input), - NUnique(input) => NI::Single(*input), - First(input) => NI::Single(*input), - Last(input) => NI::Single(*input), - Single(input) => NI::Single(*input), - Mean(input) => NI::Single(*input), - Implode(input) => NI::Single(*input), - Quantile { expr, quantile, .. } => NI::Many(vec![*expr, *quantile]), - Sum(input) => NI::Single(*input), - Count { input, .. } => NI::Single(*input), - Std(input, _) => NI::Single(*input), - Var(input, _) => NI::Single(*input), - AggGroups(input) => NI::Single(*input), + Min { input, .. } => Single(*input), + Max { input, .. } => Single(*input), + Median(input) => Single(*input), + NUnique(input) => Single(*input), + First(input) => Single(*input), + Last(input) => Single(*input), + Item(input) => Single(*input), + Mean(input) => Single(*input), + Implode(input) => Single(*input), + Quantile { expr, quantile, .. } => Many(vec![*expr, *quantile]), + Sum(input) => Single(*input), + Count { input, .. } => Single(*input), + Std(input, _) => Single(*input), + Var(input, _) => Single(*input), + AggGroups(input) => Single(*input), } } pub fn set_input(&mut self, input: Node) { @@ -273,7 +273,7 @@ impl IRAggExpr { NUnique(input) => input, First(input) => input, Last(input) => input, - Single(input) => input, + Item(input) => input, Mean(input) => input, Implode(input) => input, Quantile { expr, .. } => expr, diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_expansion.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_expansion.rs index feadd2b90a99..9f72f8315138 100644 --- a/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_expansion.rs +++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_expansion.rs @@ -467,13 +467,13 @@ fn expand_expression_rec( opt_flags, |e| Expr::Agg(AggExpr::Last(Arc::new(e))), )?, - AggExpr::Single(expr) => expand_single( + AggExpr::Item(expr) => expand_single( expr.as_ref(), ignored_selector_columns, schema, out, opt_flags, - |e| Expr::Agg(AggExpr::Single(Arc::new(e))), + |e| Expr::Agg(AggExpr::Item(Arc::new(e))), )?, AggExpr::Mean(expr) => expand_single( expr.as_ref(), diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_to_ir.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_to_ir.rs index 4bae9d92e57c..9a5441693779 100644 --- a/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_to_ir.rs +++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir/expr_to_ir.rs @@ -253,9 +253,9 @@ pub(super) fn to_aexpr_impl( let (input, output_name) = to_aexpr_mat_lit_arc!(input)?; (IRAggExpr::Last(input), output_name) }, - AggExpr::Single(input) => { + AggExpr::Item(input) => { let (input, output_name) = to_aexpr_mat_lit_arc!(input)?; - (IRAggExpr::Single(input), output_name) + (IRAggExpr::Item(input), output_name) }, AggExpr::Mean(input) => { let (input, output_name) = to_aexpr_mat_lit_arc!(input)?; diff --git a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs index 53f9577cdff8..dc4558e087af 100644 --- a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs +++ b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs @@ -122,9 +122,9 @@ pub fn node_to_expr(node: Node, expr_arena: &Arena) -> Expr { let exp = node_to_expr(expr, expr_arena); AggExpr::Last(Arc::new(exp)).into() }, - IRAggExpr::Single(expr) => { + IRAggExpr::Item(expr) => { let exp = node_to_expr(expr, expr_arena); - AggExpr::Single(Arc::new(exp)).into() + AggExpr::Item(Arc::new(exp)).into() }, IRAggExpr::Implode(expr) => { let exp = node_to_expr(expr, expr_arena); diff --git a/crates/polars-plan/src/plans/ir/format.rs b/crates/polars-plan/src/plans/ir/format.rs index 110e45815d30..fd88daa4096b 100644 --- a/crates/polars-plan/src/plans/ir/format.rs +++ b/crates/polars-plan/src/plans/ir/format.rs @@ -452,7 +452,7 @@ impl Display for ExprIRDisplay<'_> { Mean(expr) => write!(f, "{}.mean()", self.with_root(expr)), First(expr) => write!(f, "{}.first()", self.with_root(expr)), Last(expr) => write!(f, "{}.last()", self.with_root(expr)), - Single(expr) => write!(f, "{}.single()", self.with_root(expr)), + Item(expr) => write!(f, "{}.item()", self.with_root(expr)), Implode(expr) => write!(f, "{}.implode()", self.with_root(expr)), NUnique(expr) => write!(f, "{}.n_unique()", self.with_root(expr)), Sum(expr) => write!(f, "{}.sum()", self.with_root(expr)), diff --git a/crates/polars-plan/src/plans/iterator.rs b/crates/polars-plan/src/plans/iterator.rs index f088da077f30..31db4d1458f8 100644 --- a/crates/polars-plan/src/plans/iterator.rs +++ b/crates/polars-plan/src/plans/iterator.rs @@ -48,7 +48,7 @@ macro_rules! push_expr { NUnique(e) => $push($c, e), First(e) => $push($c, e), Last(e) => $push($c, e), - Single(e) => $push($c, e), + Item(e) => $push($c, e), Implode(e) => $push($c, e), Count { input, .. } => $push($c, input), Quantile { expr, .. } => $push($c, expr), diff --git a/crates/polars-plan/src/plans/optimizer/set_order/expr_pushdown.rs b/crates/polars-plan/src/plans/optimizer/set_order/expr_pushdown.rs index 038150108c52..19f000e0a763 100644 --- a/crates/polars-plan/src/plans/optimizer/set_order/expr_pushdown.rs +++ b/crates/polars-plan/src/plans/optimizer/set_order/expr_pushdown.rs @@ -205,7 +205,7 @@ fn get_frame_observing_impl( | IRAggExpr::Count { input: node, .. } | IRAggExpr::Std(node, _) | IRAggExpr::Var(node, _) - | IRAggExpr::Single(node) => { + | IRAggExpr::Item(node) => { // Input order is deregarded, but must not observe order. _ = rec!(*node); O::None diff --git a/crates/polars-plan/src/plans/visitor/expr.rs b/crates/polars-plan/src/plans/visitor/expr.rs index 6f148799e534..fbe3dd689f83 100644 --- a/crates/polars-plan/src/plans/visitor/expr.rs +++ b/crates/polars-plan/src/plans/visitor/expr.rs @@ -60,7 +60,7 @@ impl TreeWalker for Expr { NUnique(x) => NUnique(am(x, f)?), First(x) => First(am(x, f)?), Last(x) => Last(am(x, f)?), - Single(x) => Single(am(x, f)?), + Item(x) => Item(am(x, f)?), Mean(x) => Mean(am(x, f)?), Implode(x) => Implode(am(x, f)?), Count { input, include_nulls } => Count { input: am(input, f)?, include_nulls }, diff --git a/crates/polars-python/src/expr/general.rs b/crates/polars-python/src/expr/general.rs index 188e3a1b62c3..398f8e7231a8 100644 --- a/crates/polars-python/src/expr/general.rs +++ b/crates/polars-python/src/expr/general.rs @@ -152,8 +152,8 @@ impl PyExpr { fn last(&self) -> Self { self.inner.clone().last().into() } - fn single(&self) -> Self { - self.inner.clone().single().into() + fn item(&self) -> Self { + self.inner.clone().item().into() } fn implode(&self) -> Self { self.inner.clone().implode().into() diff --git a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs index d7481ecbaa67..b674cf09005b 100644 --- a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs @@ -684,8 +684,8 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult> { arguments: vec![n.0], options: py.None(), }, - IRAggExpr::Single(n) => Agg { - name: "single".into_py_any(py)?, + IRAggExpr::Item(n) => Agg { + name: "item".into_py_any(py)?, arguments: vec![n.0], options: py.None(), }, diff --git a/crates/polars-stream/src/physical_plan/lower_expr.rs b/crates/polars-stream/src/physical_plan/lower_expr.rs index e1c84a36a65c..c0070a901f30 100644 --- a/crates/polars-stream/src/physical_plan/lower_expr.rs +++ b/crates/polars-stream/src/physical_plan/lower_expr.rs @@ -1661,7 +1661,7 @@ fn lower_exprs_with_ctx( | IRAggExpr::Max { .. } | IRAggExpr::First(_) | IRAggExpr::Last(_) - | IRAggExpr::Single(_) + | IRAggExpr::Item(_) | IRAggExpr::Sum(_) | IRAggExpr::Mean(_) | IRAggExpr::Var { .. } diff --git a/crates/polars-stream/src/physical_plan/lower_group_by.rs b/crates/polars-stream/src/physical_plan/lower_group_by.rs index c0d6cb48c9b9..8a8c6b29b1f8 100644 --- a/crates/polars-stream/src/physical_plan/lower_group_by.rs +++ b/crates/polars-stream/src/physical_plan/lower_group_by.rs @@ -312,7 +312,7 @@ fn try_lower_elementwise_scalar_agg_expr( | IRAggExpr::Max { .. } | IRAggExpr::First(_) | IRAggExpr::Last(_) - | IRAggExpr::Single(_) + | IRAggExpr::Item(_) | IRAggExpr::Mean(_) | IRAggExpr::Sum(_) | IRAggExpr::Var(..) diff --git a/py-polars/src/polars/_plr.pyi b/py-polars/src/polars/_plr.pyi index 34243fe56f50..b76212fc47c9 100644 --- a/py-polars/src/polars/_plr.pyi +++ b/py-polars/src/polars/_plr.pyi @@ -1184,7 +1184,7 @@ class PyExpr: def unique_stable(self) -> PyExpr: ... def first(self) -> PyExpr: ... def last(self) -> PyExpr: ... - def single(self) -> PyExpr: ... + def item(self) -> PyExpr: ... def implode(self) -> PyExpr: ... def quantile(self, quantile: PyExpr, interpolation: Any) -> PyExpr: ... def cut( diff --git a/py-polars/src/polars/expr/expr.py b/py-polars/src/polars/expr/expr.py index 544b496041a4..67050f10ecfb 100644 --- a/py-polars/src/polars/expr/expr.py +++ b/py-polars/src/polars/expr/expr.py @@ -3444,7 +3444,7 @@ def last(self) -> Expr: return wrap_expr(self._pyexpr.last()) @unstable() - def single(self) -> Expr: + def item(self) -> Expr: """ Get the single value. @@ -3453,7 +3453,7 @@ def single(self) -> Expr: Examples -------- >>> df = pl.DataFrame({"a": [1]}) - >>> df.select(pl.col("a").single()) + >>> df.select(pl.col("a").item()) shape: (1, 1) ┌─────┐ │ a │ @@ -3463,7 +3463,7 @@ def single(self) -> Expr: │ 1 │ └─────┘ """ - return wrap_expr(self._pyexpr.single()) + return wrap_expr(self._pyexpr.item()) def over( self, diff --git a/py-polars/src/polars/expr/list.py b/py-polars/src/polars/expr/list.py index a1588dc6dbd4..0e16aa79be35 100644 --- a/py-polars/src/polars/expr/list.py +++ b/py-polars/src/polars/expr/list.py @@ -685,7 +685,7 @@ def last(self) -> Expr: return self.get(-1, null_on_oob=True) @unstable() - def single(self) -> Expr: + def item(self) -> Expr: """ Get the single value of the sublists. @@ -694,24 +694,24 @@ def single(self) -> Expr: Examples -------- >>> df = pl.DataFrame({"a": [[3], [1], [2]]}) - >>> df.with_columns(single=pl.col("a").list.single()) + >>> df.with_columns(item=pl.col("a").list.item()) shape: (3, 2) - ┌───────────┬────────┐ - │ a ┆ single │ - │ --- ┆ --- │ - │ list[i64] ┆ i64 │ - ╞═══════════╪════════╡ - │ [3] ┆ 3 │ - │ [1] ┆ 1 │ - │ [2] ┆ 2 │ - └───────────┴────────┘ + ┌───────────┬──────┐ + │ a ┆ item │ + │ --- ┆ --- │ + │ list[i64] ┆ i64 │ + ╞═══════════╪══════╡ + │ [3] ┆ 3 │ + │ [1] ┆ 1 │ + │ [2] ┆ 2 │ + └───────────┴──────┘ >>> df = pl.DataFrame({"a": [[3, 2], [1], [2]]}) - >>> df.select(pl.col("a").list.single()) + >>> df.select(pl.col("a").list.item()) Traceback (most recent call last): ... - polars.exceptions.ComputeError: aggregation 'single' expected a single value, got 2 values + polars.exceptions.ComputeError: aggregation 'item' expected a single value, got 2 values """ # noqa: W505 - return self.agg(F.element().single()) + return self.agg(F.element().item()) def contains(self, item: IntoExpr, *, nulls_equal: bool = True) -> Expr: """ diff --git a/py-polars/src/polars/series/list.py b/py-polars/src/polars/series/list.py index c2f52c881235..d0e573dc10d5 100644 --- a/py-polars/src/polars/series/list.py +++ b/py-polars/src/polars/series/list.py @@ -572,7 +572,7 @@ def last(self) -> Series: """ @unstable() - def single(self) -> Series: + def item(self) -> Series: """ Get the single value of the sublists. @@ -581,7 +581,7 @@ def single(self) -> Series: Examples -------- >>> s = pl.Series("a", [[1], [4], [6]]) - >>> s.list.single() + >>> s.list.item() shape: (3,) Series: 'a' [i64] [ diff --git a/py-polars/tests/unit/operations/aggregation/test_aggregations.py b/py-polars/tests/unit/operations/aggregation/test_aggregations.py index 46bf85cb3520..bfa4ef180f3d 100644 --- a/py-polars/tests/unit/operations/aggregation/test_aggregations.py +++ b/py-polars/tests/unit/operations/aggregation/test_aggregations.py @@ -954,14 +954,14 @@ def test_invalid_agg_dtypes_should_raise( ) ) def test_single(df: pl.DataFrame) -> None: - q = df.lazy().select(pl.all(ignore_nulls=False).single()) + q = df.lazy().select(pl.all(ignore_nulls=False).item()) assert_frame_equal(q.collect(), df) assert_frame_equal(q.collect(engine="streaming"), df) @given(df=dataframes(max_size=0)) def test_single_empty(df: pl.DataFrame) -> None: - q = df.lazy().select(pl.all().single()) + q = df.lazy().select(pl.all().item()) match = "aggregation 'single' expected a single value, got none" with pytest.raises(pl.exceptions.ComputeError, match=match): q.collect() @@ -971,7 +971,7 @@ def test_single_empty(df: pl.DataFrame) -> None: @given(df=dataframes(min_size=2)) def test_single_too_many(df: pl.DataFrame) -> None: - q = df.lazy().select(pl.all(ignore_nulls=False).single()) + q = df.lazy().select(pl.all(ignore_nulls=False).item()) match = f"aggregation 'single' expected a single value, got {df.height} values" with pytest.raises(pl.exceptions.ComputeError, match=match): q.collect() @@ -992,14 +992,14 @@ def test_single_too_many(df: pl.DataFrame) -> None: ) def test_single_on_groups(df: pl.DataFrame) -> None: df = df.with_columns(pl.col("col0").alias("key")) - q = df.lazy().group_by("col0").agg(pl.all(ignore_nulls=False).single()) + q = df.lazy().group_by("col0").agg(pl.all(ignore_nulls=False).item()) assert_frame_equal(q.collect(), df) assert_frame_equal(q.collect(engine="streaming"), df) def test_single_on_groups_empty() -> None: df = pl.DataFrame({"col0": [[]]}) - q = df.lazy().select(pl.all().list.single()) + q = df.lazy().select(pl.all().list.item()) match = "aggregation 'single' expected a single value, got none" with pytest.raises(pl.exceptions.ComputeError, match=match): q.collect() @@ -1009,7 +1009,7 @@ def test_single_on_groups_empty() -> None: def test_single_on_groups_too_many() -> None: df = pl.DataFrame({"col0": [[1, 2, 3]]}) - q = df.lazy().select(pl.all().list.single()) + q = df.lazy().select(pl.all().list.item()) match = "aggregation 'single' expected a single value, got 3 values" with pytest.raises(pl.exceptions.ComputeError, match=match): q.collect() diff --git a/py-polars/tests/unit/operations/namespaces/list/test_list.py b/py-polars/tests/unit/operations/namespaces/list/test_list.py index bb30f45a33ff..1dc723d33489 100644 --- a/py-polars/tests/unit/operations/namespaces/list/test_list.py +++ b/py-polars/tests/unit/operations/namespaces/list/test_list.py @@ -52,7 +52,7 @@ def test_list_arr_get() -> None: # Single a = pl.Series("a", [[1], [4], [6]]) expected = pl.Series("a", [1, 4, 6]) - out = a.list.single() + out = a.list.item() assert_series_equal(out, expected) a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]]) diff --git a/py-polars/tests/unit/operations/test_group_by.py b/py-polars/tests/unit/operations/test_group_by.py index d6952916c43b..9ac16b6d7a6f 100644 --- a/py-polars/tests/unit/operations/test_group_by.py +++ b/py-polars/tests/unit/operations/test_group_by.py @@ -1032,7 +1032,7 @@ def test_schema_on_agg() -> None: pl.col("b").sum().alias("sum"), pl.col("b").first().alias("first"), pl.col("b").last().alias("last"), - pl.col("b").single().alias("single"), + pl.col("b").item().alias("item"), ) expected_schema = { "a": pl.String, @@ -1041,7 +1041,7 @@ def test_schema_on_agg() -> None: "sum": pl.Int64, "first": pl.Int64, "last": pl.Int64, - "single": pl.Int64, + "item": pl.Int64, } assert result.collect_schema() == expected_schema diff --git a/py-polars/tests/unit/test_cse.py b/py-polars/tests/unit/test_cse.py index 797d619ab302..b89d68ec19d0 100644 --- a/py-polars/tests/unit/test_cse.py +++ b/py-polars/tests/unit/test_cse.py @@ -346,7 +346,7 @@ def test_cse_mixed_window_functions() -> None: pl.col("b").rank().alias("d_rank"), pl.col("b").first().over([pl.col("a")]).alias("b_first"), pl.col("b").last().over([pl.col("a")]).alias("b_last"), - pl.col("b").single().over([pl.col("a")]).alias("b_single"), + pl.col("b").item().over([pl.col("a")]).alias("b_item"), pl.col("b").shift().alias("b_lag_1"), pl.col("b").shift().alias("b_lead_1"), pl.col("c").cum_sum().alias("c_cumsum"), @@ -364,7 +364,7 @@ def test_cse_mixed_window_functions() -> None: "d_rank": [1.0], "b_first": [1], "b_last": [1], - "b_single": [1], + "b_item": [1], "b_lag_1": [None], "b_lead_1": [None], "c_cumsum": [1], diff --git a/py-polars/tests/unit/test_schema.py b/py-polars/tests/unit/test_schema.py index f000eeeed60e..110cece2dfca 100644 --- a/py-polars/tests/unit/test_schema.py +++ b/py-polars/tests/unit/test_schema.py @@ -358,16 +358,16 @@ def test_lazy_agg_to_scalar_schema_19752(lhs: pl.Expr, expr_op: str) -> None: def test_lazy_agg_schema_after_elementwise_19984() -> None: lf = pl.LazyFrame({"a": 1, "b": 1}) - q = lf.group_by("a").agg(pl.col("b").single().fill_null(0)) + q = lf.group_by("a").agg(pl.col("b").item().fill_null(0)) assert q.collect_schema() == q.collect().collect_schema() - q = lf.group_by("a").agg(pl.col("b").single().fill_null(0).fill_null(0)) + q = lf.group_by("a").agg(pl.col("b").item().fill_null(0).fill_null(0)) assert q.collect_schema() == q.collect().collect_schema() - q = lf.group_by("a").agg(pl.col("b").single() + 1) + q = lf.group_by("a").agg(pl.col("b").item() + 1) assert q.collect_schema() == q.collect().collect_schema() - q = lf.group_by("a").agg(1 + pl.col("b").single()) + q = lf.group_by("a").agg(1 + pl.col("b").item()) assert q.collect_schema() == q.collect().collect_schema() From 64180dc0badf5df5114c9d8d97d8c829a9ce4714 Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Mon, 20 Oct 2025 09:21:11 +0200 Subject: [PATCH 17/24] Fix some straggler tests --- .../operations/aggregation/test_aggregations.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/py-polars/tests/unit/operations/aggregation/test_aggregations.py b/py-polars/tests/unit/operations/aggregation/test_aggregations.py index bfa4ef180f3d..8a8f6b64568b 100644 --- a/py-polars/tests/unit/operations/aggregation/test_aggregations.py +++ b/py-polars/tests/unit/operations/aggregation/test_aggregations.py @@ -962,7 +962,7 @@ def test_single(df: pl.DataFrame) -> None: @given(df=dataframes(max_size=0)) def test_single_empty(df: pl.DataFrame) -> None: q = df.lazy().select(pl.all().item()) - match = "aggregation 'single' expected a single value, got none" + match = "aggregation 'item' expected a single value, got none" with pytest.raises(pl.exceptions.ComputeError, match=match): q.collect() with pytest.raises(pl.exceptions.ComputeError, match=match): @@ -970,9 +970,9 @@ def test_single_empty(df: pl.DataFrame) -> None: @given(df=dataframes(min_size=2)) -def test_single_too_many(df: pl.DataFrame) -> None: +def test_item_too_many(df: pl.DataFrame) -> None: q = df.lazy().select(pl.all(ignore_nulls=False).item()) - match = f"aggregation 'single' expected a single value, got {df.height} values" + match = f"aggregation 'item' expected a single value, got {df.height} values" with pytest.raises(pl.exceptions.ComputeError, match=match): q.collect() with pytest.raises(pl.exceptions.ComputeError, match=match): @@ -990,27 +990,27 @@ def test_single_too_many(df: pl.DataFrame) -> None: ], ) ) -def test_single_on_groups(df: pl.DataFrame) -> None: +def test_item_on_groups(df: pl.DataFrame) -> None: df = df.with_columns(pl.col("col0").alias("key")) q = df.lazy().group_by("col0").agg(pl.all(ignore_nulls=False).item()) assert_frame_equal(q.collect(), df) assert_frame_equal(q.collect(engine="streaming"), df) -def test_single_on_groups_empty() -> None: +def test_item_on_groups_empty() -> None: df = pl.DataFrame({"col0": [[]]}) q = df.lazy().select(pl.all().list.item()) - match = "aggregation 'single' expected a single value, got none" + match = "aggregation 'item' expected a single value, got none" with pytest.raises(pl.exceptions.ComputeError, match=match): q.collect() with pytest.raises(pl.exceptions.ComputeError, match=match): q.collect(engine="streaming") -def test_single_on_groups_too_many() -> None: +def test_item_on_groups_too_many() -> None: df = pl.DataFrame({"col0": [[1, 2, 3]]}) q = df.lazy().select(pl.all().list.item()) - match = "aggregation 'single' expected a single value, got 3 values" + match = "aggregation 'item' expected a single value, got 3 values" with pytest.raises(pl.exceptions.ComputeError, match=match): q.collect() with pytest.raises(pl.exceptions.ComputeError, match=match): From 7c349b95319cfb3c71555cb230d53b3d04ece884 Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Mon, 20 Oct 2025 09:50:39 +0200 Subject: [PATCH 18/24] Add "See Also" (get) sections to item() docs --- .../docs/source/reference/expressions/list.rst | 1 + .../source/reference/expressions/modify_select.rst | 1 + py-polars/docs/source/reference/series/list.rst | 1 + py-polars/src/polars/expr/expr.py | 11 ++++++++++- py-polars/src/polars/expr/list.py | 8 ++++++-- py-polars/src/polars/series/list.py | 14 ++++++++++++-- 6 files changed, 31 insertions(+), 5 deletions(-) diff --git a/py-polars/docs/source/reference/expressions/list.rst b/py-polars/docs/source/reference/expressions/list.rst index d0889614cf3c..bd5e2645a4e3 100644 --- a/py-polars/docs/source/reference/expressions/list.rst +++ b/py-polars/docs/source/reference/expressions/list.rst @@ -26,6 +26,7 @@ The following methods are available under the `expr.list` attribute. Expr.list.gather_every Expr.list.get Expr.list.head + Expr.list.item Expr.list.join Expr.list.last Expr.list.len diff --git a/py-polars/docs/source/reference/expressions/modify_select.rst b/py-polars/docs/source/reference/expressions/modify_select.rst index 73b9aaee9b5f..0542f17a344d 100644 --- a/py-polars/docs/source/reference/expressions/modify_select.rst +++ b/py-polars/docs/source/reference/expressions/modify_select.rst @@ -33,6 +33,7 @@ Manipulation/selection Expr.inspect Expr.interpolate Expr.interpolate_by + Expr.item Expr.limit Expr.lower_bound Expr.pipe diff --git a/py-polars/docs/source/reference/series/list.rst b/py-polars/docs/source/reference/series/list.rst index d51fb1470bb2..b5dffa265603 100644 --- a/py-polars/docs/source/reference/series/list.rst +++ b/py-polars/docs/source/reference/series/list.rst @@ -26,6 +26,7 @@ The following methods are available under the `Series.list` attribute. Series.list.gather_every Series.list.get Series.list.head + Series.list.item Series.list.join Series.list.last Series.list.len diff --git a/py-polars/src/polars/expr/expr.py b/py-polars/src/polars/expr/expr.py index 522f344adfda..a4ddc2b82f9a 100644 --- a/py-polars/src/polars/expr/expr.py +++ b/py-polars/src/polars/expr/expr.py @@ -3450,6 +3450,10 @@ def item(self) -> Expr: This raises an error if there is not exactly one value. + See Also + -------- + :meth:`Expr.get` : Get a single value by index. + Examples -------- >>> df = pl.DataFrame({"a": [1]}) @@ -3462,7 +3466,12 @@ def item(self) -> Expr: ╞═════╡ │ 1 │ └─────┘ - """ + >>> df = pl.DataFrame({"a": [1, 2, 3]}) + >>> df.select(pl.col("a").item()) + Traceback (most recent call last): + ... + polars.exceptions.ComputeError: aggregation 'item' expected a single value, got 3 values + """ # noqa: W505 return wrap_expr(self._pyexpr.item()) def over( diff --git a/py-polars/src/polars/expr/list.py b/py-polars/src/polars/expr/list.py index 0e16aa79be35..5391d8b5ecbb 100644 --- a/py-polars/src/polars/expr/list.py +++ b/py-polars/src/polars/expr/list.py @@ -691,6 +691,10 @@ def item(self) -> Expr: This errors if the sublist length is not exactly one. + See Also + -------- + :meth:`Expr.list.get` : Get the value by index in the sublists. + Examples -------- >>> df = pl.DataFrame({"a": [[3], [1], [2]]}) @@ -705,11 +709,11 @@ def item(self) -> Expr: │ [1] ┆ 1 │ │ [2] ┆ 2 │ └───────────┴──────┘ - >>> df = pl.DataFrame({"a": [[3, 2], [1], [2]]}) + >>> df = pl.DataFrame({"a": [[3, 2, 1], [1], [2]]}) >>> df.select(pl.col("a").list.item()) Traceback (most recent call last): ... - polars.exceptions.ComputeError: aggregation 'item' expected a single value, got 2 values + polars.exceptions.ComputeError: aggregation 'item' expected a single value, got 3 values """ # noqa: W505 return self.agg(F.element().item()) diff --git a/py-polars/src/polars/series/list.py b/py-polars/src/polars/series/list.py index d0e573dc10d5..a4ad46c47d37 100644 --- a/py-polars/src/polars/series/list.py +++ b/py-polars/src/polars/series/list.py @@ -576,7 +576,11 @@ def item(self) -> Series: """ Get the single value of the sublists. - This errors if the sublist does not contain exactly one element. + This errors if the sublist length is not exactly one. + + See Also + -------- + :meth:`Series.list.get` : Get the value by index in the sublists. Examples -------- @@ -589,7 +593,13 @@ def item(self) -> Series: 4 6 ] - """ + >>> df = pl.Series("a", [[3, 2, 1], [1], [2]]) + >>> df.list.item() + Traceback (most recent call last): + ... + polars.exceptions.ComputeError: aggregation 'item' expected a single value, got 3 values + """ # noqa: W505 + return self.agg(F.element().item()) def contains(self, item: IntoExpr, *, nulls_equal: bool = True) -> Series: """ From 43f84a9e7440cf4036dc013518219b6ff8d9504d Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Mon, 20 Oct 2025 10:05:03 +0200 Subject: [PATCH 19/24] Update DSL schema hashes --- crates/polars-plan/dsl-schema-hashes.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/polars-plan/dsl-schema-hashes.json b/crates/polars-plan/dsl-schema-hashes.json index 01bba80edbdd..1a0043211a25 100644 --- a/crates/polars-plan/dsl-schema-hashes.json +++ b/crates/polars-plan/dsl-schema-hashes.json @@ -1,5 +1,5 @@ { - "AggExpr": "b1952c241a576472f6f9d93395eb558ccda487ce2d39a9fa0d5174adbaa51763", + "AggExpr": "2bdb1e6f50f333246ea8eb2d2139a2fe8f9b4b638160331c3f28fac186471544", "AnonymousColumnsUdf": "04e8b658fac4f09f7f9607c73be6fd3fe258064dd33468710f2c3e188c281a69", "AnyValue": "ef2b7f7588918138f192b3545a8474915a90d211b7c786e642427b5cd565d4ef", "ArrayDataTypeFunction": "f6606e9a91efce34563b32adb32473cd19d8c1e9b184b102be72268d14306136", From 8c27aa816ca882463408833278f6a5872e66a2fd Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Mon, 20 Oct 2025 12:56:47 +0200 Subject: [PATCH 20/24] Update 2 small comments --- .../tests/unit/operations/aggregation/test_aggregations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/py-polars/tests/unit/operations/aggregation/test_aggregations.py b/py-polars/tests/unit/operations/aggregation/test_aggregations.py index 8a8f6b64568b..877c371fa998 100644 --- a/py-polars/tests/unit/operations/aggregation/test_aggregations.py +++ b/py-polars/tests/unit/operations/aggregation/test_aggregations.py @@ -948,7 +948,7 @@ def test_invalid_agg_dtypes_should_raise( min_size=1, max_size=1, excluded_dtypes=[ - # TODO(amber): This is broken, but also for .first() + # TODO: polars/#24936 pl.Struct, ], ) @@ -985,7 +985,7 @@ def test_item_too_many(df: pl.DataFrame) -> None: max_size=1, allow_null=False, excluded_dtypes=[ - # TODO(amber): This is broken, but also for .first() + # TODO: polars/#24936 pl.Struct, ], ) From 43ed4af6d7b028c9d5ec3a3058be0d8d78b1e1f3 Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Mon, 20 Oct 2025 13:58:06 +0200 Subject: [PATCH 21/24] tiny tweak --- py-polars/tests/unit/operations/namespaces/list/test_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/tests/unit/operations/namespaces/list/test_list.py b/py-polars/tests/unit/operations/namespaces/list/test_list.py index 1dc723d33489..3c9e5fe2f7be 100644 --- a/py-polars/tests/unit/operations/namespaces/list/test_list.py +++ b/py-polars/tests/unit/operations/namespaces/list/test_list.py @@ -49,7 +49,7 @@ def test_list_arr_get() -> None: expected_df = pl.Series("a", [None, None, None], dtype=pl.Int64).to_frame() assert_frame_equal(out_df, expected_df) - # Single + # item() a = pl.Series("a", [[1], [4], [6]]) expected = pl.Series("a", [1, 4, 6]) out = a.list.item() From 633631645e8bd909fe6126fdcfe938d8039365ea Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Mon, 20 Oct 2025 16:22:57 +0200 Subject: [PATCH 22/24] Fix PR comments --- crates/polars-error/src/lib.rs | 13 +++++++++++ .../src/expressions/aggregation.rs | 13 ++--------- crates/polars-expr/src/reduce/first_last.rs | 23 +++---------------- py-polars/src/polars/series/list.py | 1 - 4 files changed, 18 insertions(+), 32 deletions(-) diff --git a/crates/polars-error/src/lib.rs b/crates/polars-error/src/lib.rs index 669641498986..f7dad36ada7e 100644 --- a/crates/polars-error/src/lib.rs +++ b/crates/polars-error/src/lib.rs @@ -504,6 +504,19 @@ on startup."#.trim_start()) ComputeError: "`strptime` / `to_datetime` was called with no format and no time zone, but a time zone is part of the data.\n\nThis was previously allowed but led to unpredictable and erroneous results. Give a format string, set a time zone or perform the operation eagerly on a Series instead of on an Expr." ) }; + (item_agg_count_not_one = $n:expr) => { + if $n == 0 { + polars_err!(ComputeError: + "aggregation 'item' expected a single value, got none" + ) + } else if $n > 1 { + polars_err!(ComputeError: + "aggregation 'item' expected a single value, got {} values", $n + ) + } else { + unreachable!() + } + }; } #[macro_export] diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index 87aa04264396..03b86f787c1f 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -126,17 +126,8 @@ impl PhysicalExpr for AggregationExpr { s.tail(Some(1)) }), GroupByMethod::Item => Ok(match s.len() { - 0 => { - return Err(polars_err!(ComputeError: - "aggregation 'item' expected a single value, got none" - )); - }, - 1 => s.slice(0, 1), - n => { - return Err(polars_err!(ComputeError: - "aggregation 'item' expected a single value, got {n} values" - )); - }, + 1 => s, + n => polars_bail!(item_agg_count_not_one = n), }), GroupByMethod::Sum => parallel_op_columns( |s| s.sum_reduce().map(|sc| sc.into_column(s.name().clone())), diff --git a/crates/polars-expr/src/reduce/first_last.rs b/crates/polars-expr/src/reduce/first_last.rs index 277481dda1ed..ae781738547e 100644 --- a/crates/polars-expr/src/reduce/first_last.rs +++ b/crates/polars-expr/src/reduce/first_last.rs @@ -433,9 +433,7 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< self.seqs.clear(); if P::is_item_policy() { for count in self.counts.iter() { - if *count != 1 { - return Err(item_count_err(*count)); - } + polars_ensure!(*count == 1, item_agg_count_not_one = *count); } } unsafe { @@ -454,22 +452,7 @@ impl GroupedReduction for GenericFirstLastGroupedReduction< fn check_item_count_is_one(v: &[Value]) -> PolarsResult<()> { if let Some(Value { count: n, .. }) = v.iter().find(|v| v.count != 1) { - Err(item_count_err(*n)) - } else { - Ok(()) - } -} - -fn item_count_err(n: u64) -> PolarsError { - if n == 0 { - polars_err!(ComputeError: - "aggregation 'item' expected a single value, got none" - ) - } else if n > 1 { - polars_err!(ComputeError: - "aggregation 'item' expected a single value, got {n} values" - ) - } else { - unreachable!() + polars_bail!(item_agg_count_not_one = *n); } + Ok(()) } diff --git a/py-polars/src/polars/series/list.py b/py-polars/src/polars/series/list.py index a4ad46c47d37..425fffe9c370 100644 --- a/py-polars/src/polars/series/list.py +++ b/py-polars/src/polars/series/list.py @@ -599,7 +599,6 @@ def item(self) -> Series: ... polars.exceptions.ComputeError: aggregation 'item' expected a single value, got 3 values """ # noqa: W505 - return self.agg(F.element().item()) def contains(self, item: IntoExpr, *, nulls_equal: bool = True) -> Series: """ From a12276cc15fdc4ed7ae5e4448101565a44436060 Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Mon, 20 Oct 2025 16:40:36 +0200 Subject: [PATCH 23/24] Forgot to update one Error construction --- .../polars-expr/src/expressions/aggregation.rs | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index 03b86f787c1f..a0a8aebc3629 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -339,19 +339,11 @@ impl PhysicalExpr for AggregationExpr { GroupByMethod::Item => { let (s, groups) = ac.get_final_aggregation(); for gc in groups.group_count().iter() { - if let Some(n) = gc - && n == 0 - { - return Err(polars_err!(ComputeError: - "aggregation 'item' expected a single value, got none" - )); - } - if let Some(n) = gc - && n > 1 - { - return Err(polars_err!(ComputeError: - "aggregation 'item' expected a single value, got {n} values" - )); + match gc { + None | Some(1) => continue, + Some(n) => { + polars_bail!(item_agg_count_not_one = n); + }, } } let agg_s = s.agg_first(&groups); From 2d0f941967ad6c7e1e1f98a608f15e3796328854 Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Mon, 20 Oct 2025 17:02:44 +0200 Subject: [PATCH 24/24] PR comment --- py-polars/src/polars/_plr.pyi | 1 - 1 file changed, 1 deletion(-) diff --git a/py-polars/src/polars/_plr.pyi b/py-polars/src/polars/_plr.pyi index b76212fc47c9..ccd2975702a0 100644 --- a/py-polars/src/polars/_plr.pyi +++ b/py-polars/src/polars/_plr.pyi @@ -1551,7 +1551,6 @@ class PyExpr: def list_agg(self, expr: PyExpr) -> PyExpr: ... def list_filter(self, predicate: PyExpr) -> PyExpr: ... def list_get(self, index: PyExpr, null_on_oob: bool) -> PyExpr: ... - def list_single(self) -> PyExpr: ... def list_join(self, separator: PyExpr, ignore_nulls: bool) -> PyExpr: ... def list_len(self) -> PyExpr: ... def list_max(self) -> PyExpr: ...