Skip to content

Commit af22336

Browse files
rluvatonalamb
andauthored
perf: improve performance of vectorized_equal_to for PrimitiveGroupValueBuilder in multi group by aggregation (#17977)
## Which issue does this PR close? N/A ## Rationale for this change Making multi column aggregation even faster ## What changes are included in this PR? In `PrimitiveGroupValueBuilder.vectorized_equal_to` always evaluate and use unchecked as both of these changes are what making the code compile to SIMD. ## Are these changes tested? Existing tests ## Are there any user-facing changes? Nope ----- I tried a LOT of variations [GodBolt](https://godbolt.org/z/Kc8ze6E9n) from splitting to fixed size chunks and trying to get auto-vectorization to use gather and creating bitmask to even testing portable SIMD (just to see what it will generate). this version only optimize the non null path for the moment as it is the easiest. once and if we change from `&mut [bool]` to mutable packed bits we could: 1. evaluate in chunks of `64` items (I tried different variations to see what is the best - you can tweak in the godbolt above with different type and size to check for yourself), 64 is not necessarily the best but it will be the fastest I think for doing AND with the `equal_to_results` boolean buffer 2. add optimization for nullable as well by just doing bitwise operation at 64 items at a time and avoid the cost of getting each bit manually 3. skip 64 items right away if the the `equal_to_results` equal to `0x00` (i.e. all false) --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 8b8bdc4 commit af22336

File tree

2 files changed

+96
-26
lines changed

2 files changed

+96
-26
lines changed

datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs

Lines changed: 88 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,85 @@ where
5656
nulls: MaybeNullBufferBuilder::new(),
5757
}
5858
}
59+
60+
fn vectorized_equal_to_non_nullable(
61+
&self,
62+
lhs_rows: &[usize],
63+
array: &ArrayRef,
64+
rhs_rows: &[usize],
65+
equal_to_results: &mut [bool],
66+
) {
67+
assert!(
68+
!NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()),
69+
"called with nullable input"
70+
);
71+
let array_values = array.as_primitive::<T>().values();
72+
73+
let iter = izip!(
74+
lhs_rows.iter(),
75+
rhs_rows.iter(),
76+
equal_to_results.iter_mut(),
77+
);
78+
79+
for (&lhs_row, &rhs_row, equal_to_result) in iter {
80+
let result = {
81+
// Getting unchecked not only for bound checks but because the bound checks are
82+
// what prevents auto-vectorization
83+
let left = if cfg!(debug_assertions) {
84+
self.group_values[lhs_row]
85+
} else {
86+
// SAFETY: indices are guaranteed to be in bounds
87+
unsafe { *self.group_values.get_unchecked(lhs_row) }
88+
};
89+
let right = if cfg!(debug_assertions) {
90+
array_values[rhs_row]
91+
} else {
92+
// SAFETY: indices are guaranteed to be in bounds
93+
unsafe { *array_values.get_unchecked(rhs_row) }
94+
};
95+
96+
// Always evaluate, to allow for auto-vectorization
97+
left.is_eq(right)
98+
};
99+
100+
*equal_to_result = result && *equal_to_result;
101+
}
102+
}
103+
104+
pub fn vectorized_equal_nullable(
105+
&self,
106+
lhs_rows: &[usize],
107+
array: &ArrayRef,
108+
rhs_rows: &[usize],
109+
equal_to_results: &mut [bool],
110+
) {
111+
assert!(NULLABLE, "called with non-nullable input");
112+
let array = array.as_primitive::<T>();
113+
114+
let iter = izip!(
115+
lhs_rows.iter(),
116+
rhs_rows.iter(),
117+
equal_to_results.iter_mut(),
118+
);
119+
120+
for (&lhs_row, &rhs_row, equal_to_result) in iter {
121+
// Has found not equal to in previous column, don't need to check
122+
if !*equal_to_result {
123+
continue;
124+
}
125+
126+
// Perf: skip null check (by short circuit) if input is not nullable
127+
let exist_null = self.nulls.is_null(lhs_row);
128+
let input_null = array.is_null(rhs_row);
129+
if let Some(result) = nulls_equal_to(exist_null, input_null) {
130+
*equal_to_result = result;
131+
continue;
132+
}
133+
134+
// Otherwise, we need to check their values
135+
*equal_to_result = self.group_values[lhs_row].is_eq(array.value(rhs_row));
136+
}
137+
}
59138
}
60139

61140
impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
@@ -99,32 +178,15 @@ impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
99178
rhs_rows: &[usize],
100179
equal_to_results: &mut [bool],
101180
) {
102-
let array = array.as_primitive::<T>();
103-
104-
let iter = izip!(
105-
lhs_rows.iter(),
106-
rhs_rows.iter(),
107-
equal_to_results.iter_mut(),
108-
);
109-
110-
for (&lhs_row, &rhs_row, equal_to_result) in iter {
111-
// Has found not equal to in previous column, don't need to check
112-
if !*equal_to_result {
113-
continue;
114-
}
115-
116-
// Perf: skip null check (by short circuit) if input is not nullable
117-
if NULLABLE {
118-
let exist_null = self.nulls.is_null(lhs_row);
119-
let input_null = array.is_null(rhs_row);
120-
if let Some(result) = nulls_equal_to(exist_null, input_null) {
121-
*equal_to_result = result;
122-
continue;
123-
}
124-
// Otherwise, we need to check their values
125-
}
126-
127-
*equal_to_result = self.group_values[lhs_row].is_eq(array.value(rhs_row));
181+
if !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()) {
182+
self.vectorized_equal_to_non_nullable(
183+
lhs_rows,
184+
array,
185+
rhs_rows,
186+
equal_to_results,
187+
);
188+
} else {
189+
self.vectorized_equal_nullable(lhs_rows, array, rhs_rows, equal_to_results);
128190
}
129191
}
130192

datafusion/physical-plan/src/aggregates/group_values/null_builder.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,12 @@ impl MaybeNullBufferBuilder {
8989
new_builder.truncate(n);
9090
new_builder.finish()
9191
}
92+
93+
/// Returns true if this builder might have any nulls
94+
///
95+
/// This is guaranteed to be true if there are nulls
96+
/// but may be true even if there are no nulls
97+
pub(crate) fn might_have_nulls(&self) -> bool {
98+
self.nulls.as_slice().is_some()
99+
}
92100
}

0 commit comments

Comments
 (0)