Skip to content

Commit 447d378

Browse files
committed
Compression ratio improvement
1 parent a5531ae commit 447d378

File tree

1 file changed

+39
-46
lines changed

1 file changed

+39
-46
lines changed

src/transform/LZCodec.cpp

Lines changed: 39 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ const uint LZXCodec<true>::HASH_LOG = 21;
9393
template<>
9494
const uint LZXCodec<true>::HASH_RSHIFT = 64 - HASH_LOG;
9595
template<>
96-
const uint LZXCodec<true>::HASH_LSHIFT = 16;
96+
const uint LZXCodec<true>::HASH_LSHIFT = 24;
9797
template<>
9898
const int LZXCodec<true>::MAX_DISTANCE1 = (1 << 16) - 2;
9999
template<>
@@ -168,7 +168,6 @@ bool LZXCodec<T>::forward(SliceArray<byte>& input, SliceArray<byte>& output, int
168168
}
169169

170170
const int minMatch = mm;
171-
const int dThreshold = (maxDist == MAX_DISTANCE1) ? 1 << 8 : 1 << 16;
172171
int srcIdx = 0;
173172
int dstIdx = 13;
174173
int anchor = 0;
@@ -227,7 +226,7 @@ bool LZXCodec<T>::forward(SliceArray<byte>& input, SliceArray<byte>& output, int
227226
const int bestLen1 = findMatch(src, srcIdx1, ref1, min(srcEnd - srcIdx1, MAX_MATCH));
228227

229228
// Select best match
230-
if ((bestLen1 > bestLen) || ((bestLen1 == bestLen) && (ref1 > ref))) {
229+
if (bestLen1 >= bestLen) {
231230
ref = ref1;
232231
bestLen = bestLen1;
233232
srcIdx = srcIdx1;
@@ -257,52 +256,51 @@ bool LZXCodec<T>::forward(SliceArray<byte>& input, SliceArray<byte>& output, int
257256
// Emit match
258257
srcInc = 0;
259258

260-
// Token: 3 bits litLen + 1 bit flag + 4 bits mLen (LLLFMMMM)
261-
// LLL : <= 7 --> LLL == literal length (if 7, remainder encoded outside of token)
262-
// MMMM : <= 14 --> MMMM == match length (if 14, remainder encoded outside of token)
263-
// == 15 if dist == repd0 or repd1 && matchLen fully encoded outside of token
264-
// F : if MMMM == 15, flag = 0 if dist == repd0 and 1 if dist == repd1
265-
// else flag = 1 if dist >= dThreshold and 0 otherwise
259+
// Token: 3 bits litLen + 1 bit flag + 4 bits mLen (LLLFFMMM)
260+
// LLL : <= 7 --> LLL == literal length (if 7, remainder encoded outside of token)
261+
// MMM : <= 6 --> MMMM == match length (if 6, remainder encoded outside of token)
262+
// FF : if MMM == 7
263+
// FF = 00 if dist == repd0
264+
// FF = 01 if dist == repd1
265+
// else
266+
// FF=00 => 1 byte dist
267+
// FF=01 => 2 byte dist
268+
// FF=10 => 3 byte dist
269+
// FF=11 => 3 byte dist
266270
const int dist = srcIdx - ref;
267271
int token;
268272

269273
if (dist == repd[0]) {
270-
token = 0x0F;
274+
token = 0x07;
271275
mLenIdx += emitLength(&_mLenBuf[mLenIdx], bestLen - minMatch);
272276
}
273277
else if (dist == repd[1]) {
274-
token = 0x1F;
278+
token = 0x0F;
275279
mLenIdx += emitLength(&_mLenBuf[mLenIdx], bestLen - minMatch);
276280
}
277281
else {
278282
// Emit distance (since not repeat)
279-
if (maxDist == MAX_DISTANCE2) {
280-
_mBuf[mIdx] = byte(dist >> 16);
281-
mIdx += ((dist >= 65536) ? 1 : 0);
282-
_mBuf[mIdx++] = byte(dist >> 8);
283-
}
284-
else {
285-
_mBuf[mIdx] = byte(dist >> 8);
286-
mIdx += ((dist >= 256) ? 1 : 0);
283+
int flag = 0;
284+
285+
if (dist >= 65536 ) {
286+
_mBuf[mIdx++] = byte(dist >> 16);
287+
flag = 2;
287288
}
288289

290+
_mBuf[mIdx] = byte(dist >> 8);
291+
const int inc = (dist >= 256 ? 1 : 0);
292+
mIdx += inc;
293+
flag += inc;
289294
_mBuf[mIdx++] = byte(dist);
290295
const int mLen = bestLen - minMatch;
291296

292297
// Emit match length
293-
if (mLen >= 14) {
294-
if (mLen == 14) {
295-
// Avoid the penalty of one extra byte to encode match length
296-
token = (dist >= dThreshold) ? 0x1D : 0x0D;
297-
bestLen--;
298-
}
299-
else {
300-
token = (dist >= dThreshold) ? 0x1E : 0x0E;
301-
mLenIdx += emitLength(&_mLenBuf[mLenIdx], mLen - 14);
302-
}
298+
if (mLen >= 6) {
299+
token = 6 + (flag << 3);
300+
mLenIdx += emitLength(&_mLenBuf[mLenIdx], mLen - 6);
303301
}
304302
else {
305-
token = (dist >= dThreshold) ? mLen + 16 : mLen;
303+
token = mLen + (flag << 3);
306304
}
307305
}
308306

@@ -428,8 +426,7 @@ bool LZXCodec<T>::inverse(SliceArray<byte>& input, SliceArray<byte>& output, int
428426
mLenIdx += mIdx;
429427

430428
const int srcEnd = tkIdx - 13;
431-
const int mFlag = int(src[12]) & 1;
432-
const int maxDist = (mFlag == 0) ? MAX_DISTANCE1 : MAX_DISTANCE2;
429+
const int maxDist = ((int(src[12]) & 1) == 0) ? MAX_DISTANCE1 : MAX_DISTANCE2;
433430
const int mmIdx = (int(src[12]) >> 1) & 0x03;
434431
const int MIN_MATCHES[4] = { MIN_MATCH4, MIN_MATCH9, MIN_MATCH6, MIN_MATCH6 };
435432
const int minMatch = MIN_MATCHES[mmIdx];
@@ -461,28 +458,24 @@ bool LZXCodec<T>::inverse(SliceArray<byte>& input, SliceArray<byte>& output, int
461458
}
462459

463460
// Get match length and distance
464-
int mLen = token & 0x0F;
461+
int mLen = token & 0x07;
465462
int dist;
466463

467-
if (mLen == 15) {
464+
if (mLen == 7) {
468465
// Repetition distance, read mLen fully outside of token
469466
mLen = minMatch + readLength(src, mLenIdx);
470-
dist = ((token & 0x10) == 0) ? repd0 : repd1;
467+
dist = ((token & 0x08) == 0) ? repd0 : repd1;
471468
}
472469
else {
473470
// Read mLen remainder (if any) outside of token
474-
mLen = (mLen == 14) ? 14 + minMatch + readLength(src, mLenIdx) : mLen + minMatch;
471+
mLen = (mLen == 6) ? 6 + minMatch + readLength(src, mLenIdx) : mLen + minMatch;
475472
dist = int(src[mIdx++]);
476-
477-
if (mFlag != 0)
478-
dist = (dist << 8) | int(src[mIdx++]);
479-
480-
//if ((token & 0x10) != 0) {
481-
// dist = (dist << 8) | int(src[mIdx++]);
482-
//}
483-
const int t = (token >> 4) & 1;
484-
dist = (dist << (8 * t)) | (-t & int(src[mIdx]));
485-
mIdx += t;
473+
const int f1 = (token >> 3) & 1;
474+
dist = (dist << (8 * f1)) | (-f1 & int(src[mIdx]));
475+
mIdx += f1;
476+
const int f2 = (token >> 4) & 1;
477+
dist = (dist << (8 * f2)) | (-f2 & int(src[mIdx]));
478+
mIdx += f2;
486479
}
487480

488481
prefetchRead(&src[mLenIdx]);

0 commit comments

Comments
 (0)