Skip to content

Commit 3591a77

Browse files
BC1/4 refactoring (image-rs#85)
1 parent 7f6ab0e commit 3591a77

22 files changed

+280
-296
lines changed

benches/encode.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#![allow(unused)]
2+
13
use criterion::{black_box, criterion_group, criterion_main, Criterion};
24
use dds::{header::*, *};
35
use rand::Rng;
@@ -44,7 +46,7 @@ impl<T: 'static> Image<T> {
4446
trait ImageAsBytes {
4547
fn color(&self) -> ColorFormat;
4648
fn as_bytes(&self) -> &[u8];
47-
fn view(&self) -> ImageView;
49+
fn view(&self) -> ImageView<'_>;
4850
}
4951
impl ImageAsBytes for Image<u8> {
5052
fn color(&self) -> ColorFormat {
@@ -53,7 +55,7 @@ impl ImageAsBytes for Image<u8> {
5355
fn as_bytes(&self) -> &[u8] {
5456
&self.data
5557
}
56-
fn view(&self) -> ImageView {
58+
fn view(&self) -> ImageView<'_> {
5759
ImageView::new(self.as_bytes(), self.size, self.color()).unwrap()
5860
}
5961
}
@@ -64,7 +66,7 @@ impl ImageAsBytes for Image<u16> {
6466
fn as_bytes(&self) -> &[u8] {
6567
zerocopy::IntoBytes::as_bytes(self.data.as_slice())
6668
}
67-
fn view(&self) -> ImageView {
69+
fn view(&self) -> ImageView<'_> {
6870
ImageView::new(self.as_bytes(), self.size, self.color()).unwrap()
6971
}
7072
}
@@ -75,7 +77,7 @@ impl ImageAsBytes for Image<f32> {
7577
fn as_bytes(&self) -> &[u8] {
7678
zerocopy::IntoBytes::as_bytes(self.data.as_slice())
7779
}
78-
fn view(&self) -> ImageView {
80+
fn view(&self) -> ImageView<'_> {
7981
ImageView::new(self.as_bytes(), self.size, self.color()).unwrap()
8082
}
8183
}
@@ -171,7 +173,7 @@ pub fn encode_compressed(c: &mut Criterion) {
171173

172174
// options
173175
let mut base = EncodeOptions::default();
174-
base.parallel = true; // disable/enable parallel for benchmarking
176+
base.parallel = false; // disable/enable parallel for benchmarking
175177

176178
let mut fast = base.clone();
177179
fast.quality = CompressionQuality::Fast;
@@ -296,7 +298,7 @@ criterion_group!(
296298
benches,
297299
// encode_uncompressed,
298300
encode_compressed,
299-
encode_parallel,
301+
// encode_parallel,
300302
// generate_mipmaps
301303
);
302304
criterion_main!(benches);

src/color/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ impl ColorFormatSet {
187187
pub const fn is_all(self) -> bool {
188188
self.data == Self::ALL.data
189189
}
190+
#[allow(dead_code)] // this is only used in debug_assertions
190191
pub const fn len(self) -> u8 {
191192
self.data.count_ones() as u8
192193
}

src/encode/bc1.rs

Lines changed: 10 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ fn refine_along_line(
191191
let options = bcn_util::RefinementOptions {
192192
step_initial: 0.2,
193193
step_decay: 0.5,
194-
step_min: 0.005 / min.0.distance(max.0),
194+
step_min: 0.005 / min.0.distance(max.0).max(0.0001),
195195
max_iter: options.refine_line_max_iter as u32,
196196
};
197197

@@ -208,9 +208,12 @@ fn refine(
208208
options: Bc1Options,
209209
compute_error: impl Fn((ColorSpace, ColorSpace)) -> f32,
210210
) -> (ColorSpace, ColorSpace) {
211-
let min_max_dist = min.0.distance(max.0);
212-
let max_iter = options.refine_max_iter as u32;
213-
let refine_options = bcn_util::RefinementOptions::new_bc1(min_max_dist, max_iter);
211+
let refine_options = bcn_util::RefinementOptions {
212+
step_initial: 0.5 * min.0.distance(max.0),
213+
step_decay: 0.5,
214+
step_min: 1. / 64.,
215+
max_iter: options.refine_max_iter as u32,
216+
};
214217

215218
bcn_util::refine_endpoints(min, max, refine_options, compute_error)
216219
}
@@ -271,28 +274,18 @@ fn fit_optimal_endpoints(
271274
.create_palette(&endpoints)
272275
.block_closest(block, alpha_map);
273276

274-
let optimal = if palette_info.mode == PaletteMode::P4 {
277+
let optimal: (Vec3A, Vec3A) = if palette_info.mode == PaletteMode::P4 {
275278
debug_assert!(alpha_map == AlphaMap::ALL_OPAQUE);
276279

277-
if index_list.is_constant() {
278-
// it's not possible to fit endpoints if all indices are the same
279-
return (min, max);
280-
}
281-
282280
let mut weights = [0.0; 16];
283281
for i in 0..16 {
284282
let index = index_list.get(i);
285283
const WEIGHTS: [f32; 4] = [0.0, 1.0, 1.0 / 3.0, 2.0 / 3.0];
286284
weights[i] = WEIGHTS[index as usize];
287285
}
288286

289-
optimal_endpoints_by_weights(block, &weights, |c| c.0)
287+
bcn_util::least_squares_weights(block, &weights)
290288
} else {
291-
if index_list.is_constant_ignoring_transparent() {
292-
// it's not possible to fit endpoints if all indices are the same
293-
return (min, max);
294-
}
295-
296289
let mut colors = [Vec3A::ZERO; 16];
297290
let mut weights = [0.0; 16];
298291
let mut len = 0;
@@ -310,67 +303,15 @@ fn fit_optimal_endpoints(
310303
len += 1;
311304
}
312305
debug_assert!(len >= 2);
313-
debug_assert!(
314-
weights[..len].iter().any(|&w| w != weights[0]),
315-
"weights cannot be all the same"
316-
);
317306

318-
optimal_endpoints_by_weights(&colors[..len], &weights[..len], |c| *c)
307+
bcn_util::least_squares_weights(&colors[..len], &weights[..len])
319308
};
320309

321310
(
322311
ColorSpace(optimal.0.clamp(Vec3A::ZERO, Vec3A::ONE)),
323312
ColorSpace(optimal.1.clamp(Vec3A::ZERO, Vec3A::ONE)),
324313
)
325314
}
326-
/// https://fgiesen.wordpress.com/2024/08/29/when-is-a-bcn-astc-endpoints-from-indices-solve-singular/
327-
fn optimal_endpoints_by_weights<T>(
328-
colors: &[T],
329-
weights: &[f32],
330-
unwrap: impl Fn(&T) -> Vec3A,
331-
) -> (Vec3A, Vec3A) {
332-
assert_eq!(weights.len(), colors.len());
333-
334-
// Let A be a n-by-2 matrix where each row is [w_i, 1 - w_i].
335-
// First, compute D = A^T*A = (a b)
336-
// (b c)
337-
let (mut a, mut b, mut c) = (0.0f32, 0.0f32, 0.0f32);
338-
for &w in weights {
339-
let w_inv = 1.0 - w;
340-
a += w * w;
341-
b += w * w_inv;
342-
c += w_inv * w_inv;
343-
}
344-
345-
// Second, find D^-1
346-
let d_det = a * c - b * b;
347-
debug_assert!(
348-
d_det.abs() >= f32::EPSILON,
349-
"All weights are the same, which is not allowed"
350-
);
351-
// E = D^-1 = ( c/det -b/det)
352-
// (-b/det a/det)
353-
let d_det_rep = 1.0 / d_det;
354-
let (e00, e01, e11) = (c * d_det_rep, -b * d_det_rep, a * d_det_rep);
355-
356-
// Let B be an n-by-3 matrix where each row is the color vector.
357-
// Let X be the 2-by-3 matrix of the two endpoints we want to find.
358-
// Third, compute X = (E * A^T) * B
359-
let (mut x0, mut x1) = (Vec3A::ZERO, Vec3A::ZERO);
360-
for (color, &w) in colors.iter().map(unwrap).zip(weights) {
361-
// Let G = E * A^T be a 2-by-n matrix where each column is:
362-
// ( g_0i ) = ( e00 * w_i + e01 * (1 - w_i) )
363-
// ( g_1i ) = ( e01 * w_i + e11 * (1 - w_i) )
364-
// TODO: This can be a single Vec3A FMA operation
365-
let g0 = e00 * w + e01 * (1.0 - w); // = e01 + (e00 - e01) * w
366-
let g1 = e01 * w + e11 * (1.0 - w); // = e11 + (e01 - e11) * w
367-
368-
x0 += color * g0;
369-
x1 += color * g1;
370-
}
371-
372-
(x0, x1)
373-
}
374315

375316
fn get_single_color(block: &[Vec3A; 16], alpha_map: AlphaMap) -> Option<Vec3A> {
376317
if block.is_empty() {
@@ -1010,86 +951,6 @@ impl IndexList {
1010951
debug_assert!(self.get(index) == 0, "Cannot set an index twice.");
1011952
self.data |= (value as u32) << (index * 2);
1012953
}
1013-
1014-
const fn constant(value: u8) -> Self {
1015-
debug_assert!(value < 4);
1016-
Self {
1017-
data: 0x5555_5555 * (value as u32),
1018-
}
1019-
}
1020-
/// Returns whether all indexes are the same.
1021-
fn is_constant(&self) -> bool {
1022-
const C0: u32 = IndexList::constant(0).data;
1023-
const C1: u32 = IndexList::constant(1).data;
1024-
const C2: u32 = IndexList::constant(2).data;
1025-
const C3: u32 = IndexList::constant(3).data;
1026-
let data = self.data;
1027-
data == C0 || data == C1 || data == C2 || data == C3
1028-
}
1029-
/// Returns whether all indexes are the same, ignoring indexes that are set
1030-
/// to transparent (3).
1031-
fn is_constant_ignoring_transparent(&self) -> bool {
1032-
const C0: u32 = IndexList::constant(0).data;
1033-
const C1: u32 = IndexList::constant(1).data;
1034-
const C2: u32 = IndexList::constant(2).data;
1035-
const TRANSPARENT: u32 = IndexList::constant(3).data;
1036-
const LOW_BIT: u32 = 0x5555_5555;
1037-
const HIGH_BIT: u32 = 0xAAAA_AAAA;
1038-
let data = self.data;
1039-
let opaque_bit_mask = data ^ TRANSPARENT;
1040-
// XOR is a bitwise !=
1041-
// so now we just have to make sure that at least one of the bits per
1042-
// index is != to TRANSPARENT
1043-
let opaque_low_bits = opaque_bit_mask & LOW_BIT;
1044-
let opaque_high_bits = (opaque_bit_mask & HIGH_BIT) >> 1;
1045-
let opaque_bits = opaque_low_bits | opaque_high_bits;
1046-
// For each index, opaque_bits has a 1 if the index is opaque, and a 0 if it is transparent.
1047-
// So now juts duplicate the bits to get a mask that is 11 for opaque indexes and 00 for transparent indexes
1048-
let opaque_bits = opaque_bits | (opaque_bits << 1);
1049-
data & opaque_bits == C0 & opaque_bits
1050-
|| data & opaque_bits == C1 & opaque_bits
1051-
|| data & opaque_bits == C2 & opaque_bits
1052-
}
1053-
}
1054-
1055-
#[cfg(test)]
1056-
mod tests {
1057-
use super::IndexList;
1058-
1059-
#[test]
1060-
fn test_index_list_is_constant_ignoring_transparent() {
1061-
assert!(IndexList::constant(0).is_constant_ignoring_transparent());
1062-
assert!(IndexList::constant(1).is_constant_ignoring_transparent());
1063-
assert!(IndexList::constant(2).is_constant_ignoring_transparent());
1064-
assert!(IndexList::constant(3).is_constant_ignoring_transparent());
1065-
1066-
fn reference(indexes: &IndexList) -> bool {
1067-
if indexes.is_constant() {
1068-
return true;
1069-
}
1070-
1071-
// a bitset of all present index values
1072-
let mut present: u8 = 0;
1073-
for i in 0..16 {
1074-
present |= 1 << indexes.get(i);
1075-
}
1076-
present |= 1 << 3; // set transparent
1077-
present.count_ones() == 2
1078-
}
1079-
1080-
for constant in 0..4 {
1081-
let high = IndexList::constant(constant).data & !0xFFFF;
1082-
for low in 0..0x10000 {
1083-
let indexes = IndexList { data: high | low };
1084-
assert_eq!(
1085-
indexes.is_constant_ignoring_transparent(),
1086-
reference(&indexes),
1087-
"Failed for indexes = {:#X}",
1088-
indexes.data
1089-
);
1090-
}
1091-
}
1092-
}
1093954
}
1094955

1095956
#[derive(Clone, Copy, PartialEq, Eq)]

src/encode/bc4.rs

Lines changed: 15 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,19 @@ fn refine_endpoints(
196196
min,
197197
max,
198198
if options.fast_iter {
199-
bcn_util::RefinementOptions::new_bc4_fast(min, max)
199+
bcn_util::RefinementOptions {
200+
step_initial: 0.1 * (max - min),
201+
step_decay: 0.5,
202+
step_min: 1. / 255.,
203+
max_iter: 2,
204+
}
200205
} else {
201-
bcn_util::RefinementOptions::new_bc4(min, max)
206+
bcn_util::RefinementOptions {
207+
step_initial: 0.15 * (max - min),
208+
step_decay: 0.5,
209+
step_min: 1. / 255. / 2.,
210+
max_iter: 10,
211+
}
202212
},
203213
compute_error,
204214
);
@@ -246,11 +256,9 @@ fn compress_inter6(
246256
) -> ([u8; 8], f32) {
247257
for _ in 0..2 {
248258
let weights = Inter6Palette::new(min, max).block_closest_weights(block);
249-
if weights[0] == weights[1] && weights[1] == weights[2] && weights[2] == weights[3] {
250-
// all weights are the same, so we cannot improve the endpoints
251-
break;
252-
}
253-
(min, max) = optimal_endpoints_by_weights(&block.b, &weights);
259+
(min, max) = bcn_util::least_squares_weights_f32_vec4(&block.b, &weights);
260+
min = min.clamp(0.0, 1.0);
261+
max = max.clamp(0.0, 1.0);
254262
}
255263

256264
(min, max) = refine_endpoints(
@@ -302,54 +310,6 @@ fn compress_inter4(block: &Block, options: Bc4Options) -> ([u8; 8], f32) {
302310
(endpoints.with_indexes(indexes), error)
303311
}
304312

305-
/// https://fgiesen.wordpress.com/2024/08/29/when-is-a-bcn-astc-endpoints-from-indices-solve-singular/
306-
fn optimal_endpoints_by_weights(colors: &[Vec4; 4], weights: &[Vec4; 4]) -> (f32, f32) {
307-
// Let A be a n-by-2 matrix where each row is [w_i, 1 - w_i].
308-
// First, compute D = A^T*A = (a b)
309-
// (b c)
310-
let [w0, w1, w2, w3] = *weights;
311-
let [w0_, w1_, w2_, w3_] = [1.0 - w0, 1.0 - w1, 1.0 - w2, 1.0 - w3];
312-
let a = w0 * w0 + w1 * w1 + w2 * w2 + w3 * w3;
313-
let b = w0 * w0_ + w1 * w1_ + w2 * w2_ + w3 * w3_;
314-
let c = w0_ * w0_ + w1_ * w1_ + w2_ * w2_ + w3_ * w3_;
315-
let a = (a.x + a.y) + (a.z + a.w);
316-
let b = (b.x + b.y) + (b.z + b.w);
317-
let c = (c.x + c.y) + (c.z + c.w);
318-
319-
// Second, find D^-1
320-
let d_det = a * c - b * b;
321-
debug_assert!(
322-
d_det.abs() >= f32::EPSILON,
323-
"All weights are the same, which is not allowed"
324-
);
325-
// E = D^-1 = ( c/det -b/det)
326-
// (-b/det a/det)
327-
let d_det_rep = 1.0 / d_det;
328-
let (e00, e01, e11) = (c * d_det_rep, -b * d_det_rep, a * d_det_rep);
329-
330-
// Let B be an n-by-1 matrix where each row is the color vector.
331-
// Let X be the 2-by-1 matrix of the two endpoints we want to find.
332-
// Third, compute X = (E * A^T) * B
333-
// Let G = E * A^T be a 2-by-n matrix where each column is:
334-
// ( g_0i ) = ( e00 * w_i + e01 * (1 - w_i) ) = ( e01 + (e00 - e01) * w )
335-
// ( g_1i ) = ( e01 * w_i + e11 * (1 - w_i) ) = ( e11 + (e01 - e11) * w )
336-
let e00_01 = e00 - e01;
337-
let e01_11 = e01 - e11;
338-
let [c0, c1, c2, c3] = *colors;
339-
let x0 = (c0 * (e01 + e00_01 * w0))
340-
+ (c1 * (e01 + e00_01 * w1))
341-
+ (c2 * (e01 + e00_01 * w2))
342-
+ (c3 * (e01 + e00_01 * w3));
343-
let x1 = (c0 * (e11 + e01_11 * w0))
344-
+ (c1 * (e11 + e01_11 * w1))
345-
+ (c2 * (e11 + e01_11 * w2))
346-
+ (c3 * (e11 + e01_11 * w3));
347-
let x0 = (x0.x + x0.y) + (x0.z + x0.w);
348-
let x1 = (x1.x + x1.y) + (x1.z + x1.w);
349-
350-
(x0.clamp(0.0, 1.0), x1.clamp(0.0, 1.0))
351-
}
352-
353313
struct EndPoints {
354314
c0: u8,
355315
c1: u8,

0 commit comments

Comments
 (0)