@@ -175,24 +175,16 @@ static void compute_lowest_and_highest_weight(
175175 // unrounded weights in a straightforward way.
176176 vfloat min_weight (FLT_MAX);
177177 vfloat max_weight (-FLT_MAX);
178- unsigned int partial_weight_start = round_down_to_simd_multiple_vla (weight_count);
179- for (unsigned int i = 0 ; i < partial_weight_start; i += ASTCENC_SIMD_WIDTH)
180- {
181- vfloat weights = loada (dec_weight_ideal_value + i);
182- min_weight = min (min_weight, weights);
183- max_weight = max (max_weight, weights);
184- }
185178
186- if (partial_weight_start != weight_count)
179+ vint lane_id = vint::lane_id ();
180+ for (unsigned int i = 0 ; i < weight_count; i += ASTCENC_SIMD_WIDTH)
187181 {
188- vfloat partial_weights = loada (dec_weight_ideal_value + partial_weight_start);
189- vmask active = vint::lane_id () < vint (weight_count - partial_weight_start);
190-
191- vmask smaller = active & (partial_weights < min_weight);
192- min_weight = select (min_weight, partial_weights, smaller);
182+ vmask active = lane_id < vint (weight_count);
183+ lane_id += vint (ASTCENC_SIMD_WIDTH);
193184
194- vmask larger = active & (partial_weights > max_weight);
195- max_weight = select (max_weight, partial_weights, larger);
185+ vfloat weights = loada (dec_weight_ideal_value + i);
186+ min_weight = min (min_weight, select (min_weight, weights, active));
187+ max_weight = max (max_weight, select (max_weight, weights, active));
196188 }
197189
198190 min_weight = hmin (min_weight);
0 commit comments