RunDevelopment
diff --git a/‎benches/encode.rs‎
Lines changed: 8 additions & 6 deletions b/‎benches/encode.rs‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎src/color/mod.rs‎
Lines changed: 1 addition & 0 deletions b/‎src/color/mod.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/encode/bc1.rs‎
Lines changed: 10 additions & 149 deletions b/‎src/encode/bc1.rs‎
Lines changed: 10 additions & 149 deletions
diff --git a/‎src/encode/bc4.rs‎
Lines changed: 15 additions & 55 deletions b/‎src/encode/bc4.rs‎
Lines changed: 15 additions & 55 deletions
@@ -1,3 +1,5 @@
+#![allow(unused)]
+
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use dds::{header::*, *};
 use rand::Rng;
@@ -44,7 +46,7 @@ impl<T: 'static> Image<T> {
 trait ImageAsBytes {
     fn color(&self) -> ColorFormat;
     fn as_bytes(&self) -> &[u8];
-    fn view(&self) -> ImageView;
+    fn view(&self) -> ImageView<'_>;
 }
 impl ImageAsBytes for Image<u8> {
     fn color(&self) -> ColorFormat {
@@ -53,7 +55,7 @@ impl ImageAsBytes for Image<u8> {
     fn as_bytes(&self) -> &[u8] {
         &self.data
     }
-    fn view(&self) -> ImageView {
+    fn view(&self) -> ImageView<'_> {
         ImageView::new(self.as_bytes(), self.size, self.color()).unwrap()
     }
 }
@@ -64,7 +66,7 @@ impl ImageAsBytes for Image<u16> {
     fn as_bytes(&self) -> &[u8] {
         zerocopy::IntoBytes::as_bytes(self.data.as_slice())
     }
-    fn view(&self) -> ImageView {
+    fn view(&self) -> ImageView<'_> {
         ImageView::new(self.as_bytes(), self.size, self.color()).unwrap()
     }
 }
@@ -75,7 +77,7 @@ impl ImageAsBytes for Image<f32> {
     fn as_bytes(&self) -> &[u8] {
         zerocopy::IntoBytes::as_bytes(self.data.as_slice())
     }
-    fn view(&self) -> ImageView {
+    fn view(&self) -> ImageView<'_> {
         ImageView::new(self.as_bytes(), self.size, self.color()).unwrap()
     }
 }
@@ -171,7 +173,7 @@ pub fn encode_compressed(c: &mut Criterion) {
 
     // options
     let mut base = EncodeOptions::default();
-    base.parallel = true; // disable/enable parallel for benchmarking
+    base.parallel = false; // disable/enable parallel for benchmarking
 
     let mut fast = base.clone();
     fast.quality = CompressionQuality::Fast;
@@ -296,7 +298,7 @@ criterion_group!(
     benches,
     // encode_uncompressed,
     encode_compressed,
-    encode_parallel,
+    // encode_parallel,
     // generate_mipmaps
 );
 criterion_main!(benches);
@@ -187,6 +187,7 @@ impl ColorFormatSet {
     pub const fn is_all(self) -> bool {
         self.data == Self::ALL.data
     }
+    #[allow(dead_code)] // this is only used in debug_assertions
     pub const fn len(self) -> u8 {
         self.data.count_ones() as u8
     }
 
@@ -191,7 +191,7 @@ fn refine_along_line(
     let options = bcn_util::RefinementOptions {
         step_initial: 0.2,
         step_decay: 0.5,
-        step_min: 0.005 / min.0.distance(max.0),
+        step_min: 0.005 / min.0.distance(max.0).max(0.0001),
         max_iter: options.refine_line_max_iter as u32,
     };
 
@@ -208,9 +208,12 @@ fn refine(
     options: Bc1Options,
     compute_error: impl Fn((ColorSpace, ColorSpace)) -> f32,
 ) -> (ColorSpace, ColorSpace) {
-    let min_max_dist = min.0.distance(max.0);
-    let max_iter = options.refine_max_iter as u32;
-    let refine_options = bcn_util::RefinementOptions::new_bc1(min_max_dist, max_iter);
+    let refine_options = bcn_util::RefinementOptions {
+        step_initial: 0.5 * min.0.distance(max.0),
+        step_decay: 0.5,
+        step_min: 1. / 64.,
+        max_iter: options.refine_max_iter as u32,
+    };
 
     bcn_util::refine_endpoints(min, max, refine_options, compute_error)
 }
@@ -271,28 +274,18 @@ fn fit_optimal_endpoints(
         .create_palette(&endpoints)
         .block_closest(block, alpha_map);
 
-    let optimal = if palette_info.mode == PaletteMode::P4 {
+    let optimal: (Vec3A, Vec3A) = if palette_info.mode == PaletteMode::P4 {
         debug_assert!(alpha_map == AlphaMap::ALL_OPAQUE);
 
-        if index_list.is_constant() {
-            // it's not possible to fit endpoints if all indices are the same
-            return (min, max);
-        }
-
         let mut weights = [0.0; 16];
         for i in 0..16 {
             let index = index_list.get(i);
             const WEIGHTS: [f32; 4] = [0.0, 1.0, 1.0 / 3.0, 2.0 / 3.0];
             weights[i] = WEIGHTS[index as usize];
         }
 
-        optimal_endpoints_by_weights(block, &weights, |c| c.0)
+        bcn_util::least_squares_weights(block, &weights)
     } else {
-        if index_list.is_constant_ignoring_transparent() {
-            // it's not possible to fit endpoints if all indices are the same
-            return (min, max);
-        }
-
         let mut colors = [Vec3A::ZERO; 16];
         let mut weights = [0.0; 16];
         let mut len = 0;
@@ -310,67 +303,15 @@ fn fit_optimal_endpoints(
             len += 1;
         }
         debug_assert!(len >= 2);
-        debug_assert!(
-            weights[..len].iter().any(|&w| w != weights[0]),
-            "weights cannot be all the same"
-        );
 
-        optimal_endpoints_by_weights(&colors[..len], &weights[..len], |c| *c)
+        bcn_util::least_squares_weights(&colors[..len], &weights[..len])
     };
 
     (
         ColorSpace(optimal.0.clamp(Vec3A::ZERO, Vec3A::ONE)),
         ColorSpace(optimal.1.clamp(Vec3A::ZERO, Vec3A::ONE)),
     )
 }
-/// https://fgiesen.wordpress.com/2024/08/29/when-is-a-bcn-astc-endpoints-from-indices-solve-singular/
-fn optimal_endpoints_by_weights<T>(
-    colors: &[T],
-    weights: &[f32],
-    unwrap: impl Fn(&T) -> Vec3A,
-) -> (Vec3A, Vec3A) {
-    assert_eq!(weights.len(), colors.len());
-
-    // Let A be a n-by-2 matrix where each row is [w_i, 1 - w_i].
-    // First, compute D = A^T*A = (a b)
-    //                            (b c)
-    let (mut a, mut b, mut c) = (0.0f32, 0.0f32, 0.0f32);
-    for &w in weights {
-        let w_inv = 1.0 - w;
-        a += w * w;
-        b += w * w_inv;
-        c += w_inv * w_inv;
-    }
-
-    // Second, find D^-1
-    let d_det = a * c - b * b;
-    debug_assert!(
-        d_det.abs() >= f32::EPSILON,
-        "All weights are the same, which is not allowed"
-    );
-    // E = D^-1 = ( c/det  -b/det)
-    //            (-b/det   a/det)
-    let d_det_rep = 1.0 / d_det;
-    let (e00, e01, e11) = (c * d_det_rep, -b * d_det_rep, a * d_det_rep);
-
-    // Let B be an n-by-3 matrix where each row is the color vector.
-    // Let X be the 2-by-3 matrix of the two endpoints we want to find.
-    // Third, compute X = (E * A^T) * B
-    let (mut x0, mut x1) = (Vec3A::ZERO, Vec3A::ZERO);
-    for (color, &w) in colors.iter().map(unwrap).zip(weights) {
-        // Let G = E * A^T be a 2-by-n matrix where each column is:
-        //   ( g_0i ) = ( e00 * w_i + e01 * (1 - w_i) )
-        //   ( g_1i ) = ( e01 * w_i + e11 * (1 - w_i) )
-        // TODO: This can be a single Vec3A FMA operation
-        let g0 = e00 * w + e01 * (1.0 - w); // = e01 + (e00 - e01) * w
-        let g1 = e01 * w + e11 * (1.0 - w); // = e11 + (e01 - e11) * w
-
-        x0 += color * g0;
-        x1 += color * g1;
-    }
-
-    (x0, x1)
-}
 
 fn get_single_color(block: &[Vec3A; 16], alpha_map: AlphaMap) -> Option<Vec3A> {
     if block.is_empty() {
@@ -1010,86 +951,6 @@ impl IndexList {
         debug_assert!(self.get(index) == 0, "Cannot set an index twice.");
         self.data |= (value as u32) << (index * 2);
     }
-
-    const fn constant(value: u8) -> Self {
-        debug_assert!(value < 4);
-        Self {
-            data: 0x5555_5555 * (value as u32),
-        }
-    }
-    /// Returns whether all indexes are the same.
-    fn is_constant(&self) -> bool {
-        const C0: u32 = IndexList::constant(0).data;
-        const C1: u32 = IndexList::constant(1).data;
-        const C2: u32 = IndexList::constant(2).data;
-        const C3: u32 = IndexList::constant(3).data;
-        let data = self.data;
-        data == C0 || data == C1 || data == C2 || data == C3
-    }
-    /// Returns whether all indexes are the same, ignoring indexes that are set
-    /// to transparent (3).
-    fn is_constant_ignoring_transparent(&self) -> bool {
-        const C0: u32 = IndexList::constant(0).data;
-        const C1: u32 = IndexList::constant(1).data;
-        const C2: u32 = IndexList::constant(2).data;
-        const TRANSPARENT: u32 = IndexList::constant(3).data;
-        const LOW_BIT: u32 = 0x5555_5555;
-        const HIGH_BIT: u32 = 0xAAAA_AAAA;
-        let data = self.data;
-        let opaque_bit_mask = data ^ TRANSPARENT;
-        // XOR is a bitwise !=
-        // so now we just have to make sure that at least one of the bits per
-        // index is != to TRANSPARENT
-        let opaque_low_bits = opaque_bit_mask & LOW_BIT;
-        let opaque_high_bits = (opaque_bit_mask & HIGH_BIT) >> 1;
-        let opaque_bits = opaque_low_bits | opaque_high_bits;
-        // For each index, opaque_bits has a 1 if the index is opaque, and a 0 if it is transparent.
-        // So now juts duplicate the bits to get a mask that is 11 for opaque indexes and 00 for transparent indexes
-        let opaque_bits = opaque_bits | (opaque_bits << 1);
-        data & opaque_bits == C0 & opaque_bits
-            || data & opaque_bits == C1 & opaque_bits
-            || data & opaque_bits == C2 & opaque_bits
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::IndexList;
-
-    #[test]
-    fn test_index_list_is_constant_ignoring_transparent() {
-        assert!(IndexList::constant(0).is_constant_ignoring_transparent());
-        assert!(IndexList::constant(1).is_constant_ignoring_transparent());
-        assert!(IndexList::constant(2).is_constant_ignoring_transparent());
-        assert!(IndexList::constant(3).is_constant_ignoring_transparent());
-
-        fn reference(indexes: &IndexList) -> bool {
-            if indexes.is_constant() {
-                return true;
-            }
-
-            // a bitset of all present index values
-            let mut present: u8 = 0;
-            for i in 0..16 {
-                present |= 1 << indexes.get(i);
-            }
-            present |= 1 << 3; // set transparent
-            present.count_ones() == 2
-        }
-
-        for constant in 0..4 {
-            let high = IndexList::constant(constant).data & !0xFFFF;
-            for low in 0..0x10000 {
-                let indexes = IndexList { data: high | low };
-                assert_eq!(
-                    indexes.is_constant_ignoring_transparent(),
-                    reference(&indexes),
-                    "Failed for indexes = {:#X}",
-                    indexes.data
-                );
-            }
-        }
-    }
 }
 
 #[derive(Clone, Copy, PartialEq, Eq)]
 
@@ -196,9 +196,19 @@ fn refine_endpoints(
             min,
             max,
             if options.fast_iter {
-                bcn_util::RefinementOptions::new_bc4_fast(min, max)
+                bcn_util::RefinementOptions {
+                    step_initial: 0.1 * (max - min),
+                    step_decay: 0.5,
+                    step_min: 1. / 255.,
+                    max_iter: 2,
+                }
             } else {
-                bcn_util::RefinementOptions::new_bc4(min, max)
+                bcn_util::RefinementOptions {
+                    step_initial: 0.15 * (max - min),
+                    step_decay: 0.5,
+                    step_min: 1. / 255. / 2.,
+                    max_iter: 10,
+                }
             },
             compute_error,
         );
@@ -246,11 +256,9 @@ fn compress_inter6(
 ) -> ([u8; 8], f32) {
     for _ in 0..2 {
         let weights = Inter6Palette::new(min, max).block_closest_weights(block);
-        if weights[0] == weights[1] && weights[1] == weights[2] && weights[2] == weights[3] {
-            // all weights are the same, so we cannot improve the endpoints
-            break;
-        }
-        (min, max) = optimal_endpoints_by_weights(&block.b, &weights);
+        (min, max) = bcn_util::least_squares_weights_f32_vec4(&block.b, &weights);
+        min = min.clamp(0.0, 1.0);
+        max = max.clamp(0.0, 1.0);
     }
 
     (min, max) = refine_endpoints(
@@ -302,54 +310,6 @@ fn compress_inter4(block: &Block, options: Bc4Options) -> ([u8; 8], f32) {
     (endpoints.with_indexes(indexes), error)
 }
 
-/// https://fgiesen.wordpress.com/2024/08/29/when-is-a-bcn-astc-endpoints-from-indices-solve-singular/
-fn optimal_endpoints_by_weights(colors: &[Vec4; 4], weights: &[Vec4; 4]) -> (f32, f32) {
-    // Let A be a n-by-2 matrix where each row is [w_i, 1 - w_i].
-    // First, compute D = A^T*A = (a b)
-    //                            (b c)
-    let [w0, w1, w2, w3] = *weights;
-    let [w0_, w1_, w2_, w3_] = [1.0 - w0, 1.0 - w1, 1.0 - w2, 1.0 - w3];
-    let a = w0 * w0 + w1 * w1 + w2 * w2 + w3 * w3;
-    let b = w0 * w0_ + w1 * w1_ + w2 * w2_ + w3 * w3_;
-    let c = w0_ * w0_ + w1_ * w1_ + w2_ * w2_ + w3_ * w3_;
-    let a = (a.x + a.y) + (a.z + a.w);
-    let b = (b.x + b.y) + (b.z + b.w);
-    let c = (c.x + c.y) + (c.z + c.w);
-
-    // Second, find D^-1
-    let d_det = a * c - b * b;
-    debug_assert!(
-        d_det.abs() >= f32::EPSILON,
-        "All weights are the same, which is not allowed"
-    );
-    // E = D^-1 = ( c/det  -b/det)
-    //            (-b/det   a/det)
-    let d_det_rep = 1.0 / d_det;
-    let (e00, e01, e11) = (c * d_det_rep, -b * d_det_rep, a * d_det_rep);
-
-    // Let B be an n-by-1 matrix where each row is the color vector.
-    // Let X be the 2-by-1 matrix of the two endpoints we want to find.
-    // Third, compute X = (E * A^T) * B
-    // Let G = E * A^T be a 2-by-n matrix where each column is:
-    //   ( g_0i ) = ( e00 * w_i + e01 * (1 - w_i) ) = ( e01 + (e00 - e01) * w )
-    //   ( g_1i ) = ( e01 * w_i + e11 * (1 - w_i) ) = ( e11 + (e01 - e11) * w )
-    let e00_01 = e00 - e01;
-    let e01_11 = e01 - e11;
-    let [c0, c1, c2, c3] = *colors;
-    let x0 = (c0 * (e01 + e00_01 * w0))
-        + (c1 * (e01 + e00_01 * w1))
-        + (c2 * (e01 + e00_01 * w2))
-        + (c3 * (e01 + e00_01 * w3));
-    let x1 = (c0 * (e11 + e01_11 * w0))
-        + (c1 * (e11 + e01_11 * w1))
-        + (c2 * (e11 + e01_11 * w2))
-        + (c3 * (e11 + e01_11 * w3));
-    let x0 = (x0.x + x0.y) + (x0.z + x0.w);
-    let x1 = (x1.x + x1.y) + (x1.z + x1.w);
-
-    (x0.clamp(0.0, 1.0), x1.clamp(0.0, 1.0))
-}
-
 struct EndPoints {
     c0: u8,
     c1: u8,
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+#![allow(unused)]`
	`2`	`+`
`1`	`3`	`use criterion::{black_box, criterion_group, criterion_main, Criterion};`
`2`	`4`	`use dds::{header::, };`
`3`	`5`	`use rand::Rng;`
`@@ -44,7 +46,7 @@ impl<T: 'static> Image<T> {`
`44`	`46`	`trait ImageAsBytes {`
`45`	`47`	`fn color(&self) -> ColorFormat;`
`46`	`48`	`fn as_bytes(&self) -> &[u8];`
`47`		`- fn view(&self) -> ImageView;`
	`49`	`+ fn view(&self) -> ImageView<'_>;`
`48`	`50`	`}`
`49`	`51`	`impl ImageAsBytes for Image<u8> {`
`50`	`52`	`fn color(&self) -> ColorFormat {`
`@@ -53,7 +55,7 @@ impl ImageAsBytes for Image<u8> {`
`53`	`55`	`fn as_bytes(&self) -> &[u8] {`
`54`	`56`	`&self.data`
`55`	`57`	`}`
`56`		`- fn view(&self) -> ImageView {`
	`58`	`+ fn view(&self) -> ImageView<'_> {`
`57`	`59`	`ImageView::new(self.as_bytes(), self.size, self.color()).unwrap()`
`58`	`60`	`}`
`59`	`61`	`}`
`@@ -64,7 +66,7 @@ impl ImageAsBytes for Image<u16> {`
`64`	`66`	`fn as_bytes(&self) -> &[u8] {`
`65`	`67`	`zerocopy::IntoBytes::as_bytes(self.data.as_slice())`
`66`	`68`	`}`
`67`		`- fn view(&self) -> ImageView {`
	`69`	`+ fn view(&self) -> ImageView<'_> {`
`68`	`70`	`ImageView::new(self.as_bytes(), self.size, self.color()).unwrap()`
`69`	`71`	`}`
`70`	`72`	`}`
`@@ -75,7 +77,7 @@ impl ImageAsBytes for Image<f32> {`
`75`	`77`	`fn as_bytes(&self) -> &[u8] {`
`76`	`78`	`zerocopy::IntoBytes::as_bytes(self.data.as_slice())`
`77`	`79`	`}`
`78`		`- fn view(&self) -> ImageView {`
	`80`	`+ fn view(&self) -> ImageView<'_> {`
`79`	`81`	`ImageView::new(self.as_bytes(), self.size, self.color()).unwrap()`
`80`	`82`	`}`
`81`	`83`	`}`
`@@ -171,7 +173,7 @@ pub fn encode_compressed(c: &mut Criterion) {`
`171`	`173`
`172`	`174`	`// options`
`173`	`175`	`let mut base = EncodeOptions::default();`
`174`		`- base.parallel = true; // disable/enable parallel for benchmarking`
	`176`	`+ base.parallel = false; // disable/enable parallel for benchmarking`
`175`	`177`
`176`	`178`	`let mut fast = base.clone();`
`177`	`179`	`fast.quality = CompressionQuality::Fast;`
`@@ -296,7 +298,7 @@ criterion_group!(`
`296`	`298`	`benches,`
`297`	`299`	`// encode_uncompressed,`
`298`	`300`	`encode_compressed,`
`299`		`- encode_parallel,`
	`301`	`+ // encode_parallel,`
`300`	`302`	`// generate_mipmaps`
`301`	`303`	`);`
`302`	`304`	`criterion_main!(benches);`
Original file line number	Diff line number	Diff line change
`@@ -187,6 +187,7 @@ impl ColorFormatSet {`
`187`	`187`	`pub const fn is_all(self) -> bool {`
`188`	`188`	`self.data == Self::ALL.data`
`189`	`189`	`}`
	`190`	`+ #[allow(dead_code)] // this is only used in debug_assertions`
`190`	`191`	`pub const fn len(self) -> u8 {`
`191`	`192`	`self.data.count_ones() as u8`
`192`	`193`	`}`