@@ -270,6 +270,8 @@ class JPEGEncodingContext {
270270 }
271271 }
272272 m_macroblocks.clear ();
273+
274+ TRY (find_optimal_huffman_tables ());
273275 return {};
274276 }
275277
@@ -337,14 +339,27 @@ class JPEGEncodingContext {
337339 return m_bit_stream.write_bits (symbol.word , symbol.code_length );
338340 }
339341
342+ ErrorOr<void > append_symbol (Symbol symbol)
343+ {
344+ auto & stat_table = [&]() -> auto & {
345+ if (symbol.component_id == 0 || symbol.component_id == 3 ) {
346+ return symbol.is_dc ? m_symbol_stats[0 ] : m_symbol_stats[1 ];
347+ }
348+ return symbol.is_dc ? m_symbol_stats[2 ] : m_symbol_stats[3 ];
349+ }();
350+ stat_table[symbol.byte ] += 1 ;
351+ TRY (m_symbols_and_bits.try_append (symbol));
352+ return {};
353+ }
354+
340355 ErrorOr<void > encode_dc (i16 const component[], u8 component_id)
341356 {
342357 // F.1.2.1.3 - Huffman encoding procedures for DC coefficients
343358 auto diff = component[0 ] - m_last_dc_values[component_id];
344359 m_last_dc_values[component_id] = component[0 ];
345360
346361 auto const size = csize (diff);
347- TRY (m_symbols_and_bits. try_append (Symbol { .byte = size, .component_id = component_id, .is_dc = true }));
362+ TRY (append_symbol ( { .byte = size, .component_id = component_id, .is_dc = true }));
348363
349364 if (diff < 0 )
350365 diff -= 1 ;
@@ -365,23 +380,23 @@ class JPEGEncodingContext {
365380 auto coefficient = component[zigzag_map[k]];
366381 if (coefficient == 0 ) {
367382 if (k == 63 ) {
368- TRY (m_symbols_and_bits. try_append (Symbol { .byte = 0x00 , .component_id = component_id, .is_dc = false }));
383+ TRY (append_symbol ( { .byte = 0x00 , .component_id = component_id, .is_dc = false }));
369384 break ;
370385 }
371386 r += 1 ;
372387 continue ;
373388 }
374389
375390 while (r > 15 ) {
376- TRY (m_symbols_and_bits. try_append (Symbol { .byte = 0xF0 , .component_id = component_id, .is_dc = false }));
391+ TRY (append_symbol ( { .byte = 0xF0 , .component_id = component_id, .is_dc = false }));
377392 r -= 16 ;
378393 }
379394
380395 {
381396 // F.3 - Sequential encoding of a non-zero AC coefficient
382397 auto const ssss = csize (coefficient);
383398 u8 const rs = (r << 4 ) + ssss;
384- TRY (m_symbols_and_bits. try_append (Symbol { .byte = rs, .component_id = component_id, .is_dc = false }));
399+ TRY (append_symbol ( { .byte = rs, .component_id = component_id, .is_dc = false }));
385400
386401 if (coefficient < 0 )
387402 coefficient -= 1 ;
@@ -414,6 +429,187 @@ class JPEGEncodingContext {
414429 return {};
415430 }
416431
432+ static void find_smallest_frequencies (Array<u32 , 257 > const & frequencies, u16 & v1, Optional<u16 >& v2)
433+ {
434+ // FIXME: A min-heap with a custom comparator should be able to do the trick.
435+
436+ // "The procedure “Find V1 for least value of FREQ(V1) > 0” always selects the value
437+ // with the largest value of V1 when more than one V1 with the same frequency occurs.
438+ // The reserved code point is then guaranteed to be in the longest code word category."
439+
440+ u16 index_min {};
441+ u16 second_index_min {};
442+ u32 freq_min = NumericLimits<u32 >::max ();
443+ u32 second_freq_min = NumericLimits<u32 >::max ();
444+
445+ for (auto [i, freq] : enumerate(frequencies)) {
446+ if (freq == 0 )
447+ continue ;
448+ if (freq <= freq_min) {
449+ second_index_min = index_min;
450+ second_freq_min = freq_min;
451+ index_min = i;
452+ freq_min = freq;
453+ } else if (freq <= second_freq_min) {
454+ second_index_min = i;
455+ second_freq_min = freq;
456+ }
457+ }
458+
459+ v1 = index_min;
460+ if (second_freq_min != NumericLimits<u32 >::max ())
461+ v2 = second_index_min;
462+ else
463+ v2.clear ();
464+ }
465+
466+ static Array<u8 , 257 > find_huffman_code_size (Array<u32 , 257 > frequencies)
467+ {
468+ // "Before starting the procedure, the values of FREQ are collected for V = 0 to 255
469+ // and the FREQ value for V = 256 is set to 1."
470+ frequencies[256 ] = 1 ;
471+
472+ // "the entries in CODESIZE are all set to 0"
473+ Array<u8 , 257 > code_size {};
474+
475+ // "the indices in OTHERS are set to –1"
476+ Array<i16 , 257 > others {};
477+ others.fill (-1 );
478+
479+ // Figure K.1 – Procedure to find Huffman code sizes
480+ while (true ) {
481+ u16 v1 {};
482+ Optional<u16 > maybe_v2 {};
483+ find_smallest_frequencies (frequencies, v1, maybe_v2);
484+ if (!maybe_v2.has_value ())
485+ break ;
486+
487+ auto v2 = maybe_v2.value ();
488+
489+ frequencies[v1] += frequencies[v2];
490+ frequencies[v2] = 0 ;
491+
492+ increment_v1_code_size:
493+ code_size[v1] += 1 ;
494+
495+ if (others[v1] != -1 ) {
496+ v1 = others[v1];
497+ goto increment_v1_code_size;
498+ }
499+
500+ others[v1] = v2;
501+
502+ increment_v2_code_size:
503+ code_size[v2] += 1 ;
504+ if (others[v2] != -1 ) {
505+ v2 = others[v2];
506+ goto increment_v2_code_size;
507+ }
508+ }
509+
510+ return code_size;
511+ }
512+
513+ static void adjust_bits (Array<u8 , 257 >& bits)
514+ {
515+ // Figure K.3 – Procedure for limiting code lengths to 16 bits
516+ u16 i = 32 ;
517+ while (true ) {
518+ if (bits[i] > 0 ) {
519+ auto j = i - 1 ;
520+ do {
521+ j--;
522+ } while (bits[j] == 0 );
523+
524+ bits[i] = bits[i] - 2 ;
525+ bits[i - 1 ] = bits[i - 1 ] + 1 ;
526+ bits[j + 1 ] = bits[j + 1 ] + 2 ;
527+ bits[j] = bits[j] - 1 ;
528+ } else {
529+ i -= 1 ;
530+ if (i != 16 )
531+ continue ;
532+
533+ while (bits[i] == 0 )
534+ --i;
535+ bits[i] -= 1 ;
536+ break ;
537+ }
538+ }
539+ }
540+
541+ static Array<u8 , 257 > count_bits (Array<u8 , 257 > const & code_size)
542+ {
543+ // "The count for each size is contained in the list, BITS. The counts in BITS are zero
544+ // at the start of the procedure."
545+ Array<u8 , 257 > bits {};
546+
547+ // Figure K.2 – Procedure to find the number of codes of each size
548+ for (u16 i = 0 ; i < 257 ; ++i) {
549+ if (code_size[i] == 0 )
550+ continue ;
551+ bits[code_size[i]] += 1 ;
552+ }
553+ adjust_bits (bits);
554+
555+ return bits;
556+ }
557+
558+ static Vector<u8 , 256 > sort_input (Array<u8 , 257 > const & code_size)
559+ {
560+ // "Figure K.4 – Sorting of input values according to code size"
561+ Vector<u8 , 256 > huffval {};
562+ for (u8 i = 1 ; i <= 32 ; ++i) {
563+ for (u16 j = 0 ; j <= 255 ; ++j) {
564+ if (code_size[j] == i)
565+ huffval.append (j);
566+ }
567+ }
568+ return huffval;
569+ }
570+
571+ static ErrorOr<OutputHuffmanTable> compute_optimal_table (Array<u32 , 257 > const & distribution)
572+ {
573+ // K.2 A procedure for generating the lists which specify a Huffman code table
574+
575+ auto code_size = find_huffman_code_size (distribution);
576+
577+ auto bits = count_bits (code_size);
578+
579+ // "The input values are sorted according to code size"
580+ auto huffval = sort_input (code_size);
581+
582+ // "At this point, the list of code lengths (BITS) and the list of values
583+ // (HUFFVAL) can be used to generate the code tables."
584+
585+ Vector<OutputHuffmanTable::Symbol, 16 > symbols;
586+ u16 code = 0 ;
587+ u32 symbol_index = 0 ;
588+ for (auto [encoded_size, number_of_codes] : enumerate(bits)) {
589+ for (u8 i = 0 ; i < number_of_codes; i++) {
590+ TRY (symbols.try_append ({ .input_byte = huffval[symbol_index], .code_length = static_cast <u8 >(encoded_size), .word = code }));
591+ code++;
592+ symbol_index++;
593+ }
594+ code <<= 1 ;
595+ }
596+
597+ return OutputHuffmanTable { move (symbols) };
598+ }
599+
600+ ErrorOr<void > find_optimal_huffman_tables ()
601+ {
602+ dc_luminance_huffman_table = TRY (compute_optimal_table (m_symbol_stats[0 ]));
603+ dc_luminance_huffman_table.id = (0 << 4 ) | 0 ;
604+ ac_luminance_huffman_table = TRY (compute_optimal_table (m_symbol_stats[1 ]));
605+ ac_luminance_huffman_table.id = (1 << 4 ) | 0 ;
606+ dc_chrominance_huffman_table = TRY (compute_optimal_table (m_symbol_stats[2 ]));
607+ dc_chrominance_huffman_table.id = (0 << 4 ) | 1 ;
608+ ac_chrominance_huffman_table = TRY (compute_optimal_table (m_symbol_stats[3 ]));
609+ ac_chrominance_huffman_table.id = (1 << 4 ) | 1 ;
610+ return {};
611+ }
612+
417613 static u8 csize (i16 coefficient)
418614 {
419615 VERIFY (coefficient >= -2047 && coefficient <= 2047 );
@@ -430,6 +626,7 @@ class JPEGEncodingContext {
430626 Vector<FloatMacroblock> m_macroblocks {};
431627 Array<i16 , 4 > m_last_dc_values {};
432628
629+ Array<Array<u32 , 257 >, 4 > m_symbol_stats {};
433630 Vector<SymbolOrRawBits> m_symbols_and_bits {};
434631
435632 JPEGBigEndianOutputBitStream m_bit_stream;
@@ -629,14 +826,8 @@ ErrorOr<void> add_scan_header(Stream& stream, Mode mode)
629826 return {};
630827}
631828
632- ErrorOr<void > add_headers (Stream& stream, JPEGEncodingContext& context, JPEGWriter::Options const & options, IntSize size, Mode mode)
829+ ErrorOr<void > add_headers (Stream& stream, JPEGEncodingContext const & context, JPEGWriter::Options const & options, IntSize size, Mode mode)
633830{
634- context.dc_luminance_huffman_table = s_default_dc_luminance_huffman_table;
635- context.dc_chrominance_huffman_table = s_default_dc_chrominance_huffman_table;
636-
637- context.ac_luminance_huffman_table = s_default_ac_luminance_huffman_table;
638- context.ac_chrominance_huffman_table = s_default_ac_chrominance_huffman_table;
639-
640831 TRY (add_start_of_image (stream));
641832
642833 if (options.icc_data .has_value ())
0 commit comments