forked from MateuszBartosiewicz/bzip2
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBZip2MTFAndRLE2StageEncoder.java
More file actions
192 lines (146 loc) · 4.34 KB
/
BZip2MTFAndRLE2StageEncoder.java
File metadata and controls
192 lines (146 loc) · 4.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
package org.itadaki.bzip2;
/**
* An encoder for the BZip2 Move To Front Transform and Run-Length Encoding[2] stages<br>
* Although conceptually these two stages are separate, it is computationally efficient to perform
* them in one pass.
*/
public class BZip2MTFAndRLE2StageEncoder {
/**
* The Burrows-Wheeler transformed block
*/
private final int[] bwtBlock;
/**
* Actual length of the data in the {@link bwtBlock} array
*/
private int bwtLength;
/**
* At each position, {@code true} if the byte value with that index is present within the block,
* otherwise {@code false}
*/
private final boolean[] bwtValuesInUse;
/**
* The output of the Move To Front Transform and Run-Length Encoding[2] stages
*/
private final char[] mtfBlock;
/**
* The actual number of values contained in the {@link mtfBlock} array
*/
private int mtfLength;
/**
* The global frequencies of values within the {@link mtfBlock} array
*/
private final int[] mtfSymbolFrequencies = new int[BZip2Constants.HUFFMAN_MAXIMUM_ALPHABET_SIZE];
/**
* The encoded alphabet size
*/
private int alphabetSize;
/**
* Performs the Move To Front transform and Run Length Encoding[1] stages
*/
public void encode() {
final int bwtLength = this.bwtLength;
final boolean[] bwtValuesInUse = this.bwtValuesInUse;
final int[] bwtBlock = this.bwtBlock;
final char[] mtfBlock = this.mtfBlock;
final int[] mtfSymbolFrequencies = this.mtfSymbolFrequencies;
final byte[] huffmanSymbolMap = new byte[256];
final MoveToFront symbolMTF = new MoveToFront();
int totalUniqueValues = 0;
for (int i = 0; i < 256; i++) {
if (bwtValuesInUse[i]) {
huffmanSymbolMap[i] = (byte) totalUniqueValues++;
}
}
final int endOfBlockSymbol = totalUniqueValues + 1;
int mtfIndex = 0;
int repeatCount = 0;
int totalRunAs = 0;
int totalRunBs = 0;
for (int i = 0; i < bwtLength; i++) {
// Move To Front
final int mtfPosition = symbolMTF.valueToFront (huffmanSymbolMap[bwtBlock[i] & 0xff]);
// Run Length Encode
if (mtfPosition == 0) {
repeatCount++;
} else {
if (repeatCount > 0) {
repeatCount--;
while (true) {
if ((repeatCount & 1) == 0) {
mtfBlock[mtfIndex++] = BZip2Constants.HUFFMAN_SYMBOL_RUNA;
totalRunAs++;
} else {
mtfBlock[mtfIndex++] = BZip2Constants.HUFFMAN_SYMBOL_RUNB;
totalRunBs++;
}
if (repeatCount <= 1) {
break;
}
repeatCount = (repeatCount - 2) >>> 1;
}
repeatCount = 0;
}
mtfBlock[mtfIndex++] = (char) (mtfPosition + 1);
mtfSymbolFrequencies[mtfPosition + 1]++;
}
}
if (repeatCount > 0) {
repeatCount--;
while (true) {
if ((repeatCount & 1) == 0) {
mtfBlock[mtfIndex++] = BZip2Constants.HUFFMAN_SYMBOL_RUNA;
totalRunAs++;
} else {
mtfBlock[mtfIndex++] = BZip2Constants.HUFFMAN_SYMBOL_RUNB;
totalRunBs++;
}
if (repeatCount <= 1) {
break;
}
repeatCount = (repeatCount - 2) >>> 1;
}
}
mtfBlock[mtfIndex] = (char) endOfBlockSymbol;
mtfSymbolFrequencies[endOfBlockSymbol]++;
mtfSymbolFrequencies[BZip2Constants.HUFFMAN_SYMBOL_RUNA] += totalRunAs;
mtfSymbolFrequencies[BZip2Constants.HUFFMAN_SYMBOL_RUNB] += totalRunBs;
this.mtfLength = mtfIndex + 1;
this.alphabetSize = endOfBlockSymbol + 1;
}
/**
* @return The encoded MTF block
*/
public char[] getMtfBlock() {
return this.mtfBlock;
}
/**
* @return The actual length of the MTF block
*/
public int getMtfLength() {
return this.mtfLength;
}
/**
* @return The size of the MTF block's alphabet
*/
public int getMtfAlphabetSize() {
return this.alphabetSize;
}
/**
* @return The frequencies of the MTF block's symbols
*/
public int[] getMtfSymbolFrequencies() {
return this.mtfSymbolFrequencies;
}
/**
* @param bwtBlock The Burrows Wheeler Transformed block data
* @param bwtLength The actual length of the BWT data
* @param bwtValuesPresent The values that are present within the BWT data. For each index,
* {@code true} if that value is present within the data, otherwise {@code false}
*/
public BZip2MTFAndRLE2StageEncoder (final int[] bwtBlock, final int bwtLength, final boolean[] bwtValuesPresent) {
this.bwtBlock = bwtBlock;
this.bwtLength = bwtLength;
this.bwtValuesInUse = bwtValuesPresent;
this.mtfBlock = new char[bwtLength + 1];
}
}