Skip to content

Commit 2fefe63

Browse files
committed
chore: Refactor
1 parent 4203e7f commit 2fefe63

File tree

7 files changed

+137
-272
lines changed

7 files changed

+137
-272
lines changed

FileCompression.jar

-20.6 KB
Binary file not shown.

renovate.json

Lines changed: 0 additions & 19 deletions
This file was deleted.

src/main/java/prog/huffman/HuffmanCompressor.java

Lines changed: 62 additions & 210 deletions
Original file line numberDiff line numberDiff line change
@@ -7,82 +7,18 @@
77
import java.io.FileInputStream;
88
import java.io.FileOutputStream;
99
import java.io.IOException;
10-
import java.io.PrintStream;
10+
import java.util.Arrays;
1111
import java.util.PriorityQueue;
1212

13+
import prog.lzw.LzwCompressor;
14+
1315
//if the frequency of a byte is more than 2^32 then there will be problem
1416
public class HuffmanCompressor {
15-
16-
static PriorityQueue<HuffmanNode> priorityQueue = new PriorityQueue<HuffmanNode>();
17-
static int[] frequency = new int[300];
18-
static String[] huffmanCodes = new String[300];
19-
static int extraBits;
20-
static byte currentByte;
21-
static int uniqueCharCount; // number of different characters
22-
23-
// for keeping frequncies of all the bytes
24-
25-
// main tree class
26-
27-
static HuffmanNode root;
28-
29-
/*******************************************************************************
30-
* calculating frequence of file filename
31-
******************************************************************************/
32-
public static void calculateFrequencyOfBytesInFile(String filename) {
33-
File file = new File(filename);;
34-
Byte currentByte;
35-
try {
36-
FileInputStream file_input = new FileInputStream(file);
37-
DataInputStream data_in = new DataInputStream(file_input);
38-
while (true) {
39-
try {
40-
41-
currentByte = data_in.readByte();
42-
frequency[HuffmanUtils.to(currentByte)]++;
43-
} catch (EOFException eof) {
44-
System.out.println("End of File");
45-
break;
46-
}
47-
}
48-
file_input.close();
49-
data_in.close();
50-
} catch (IOException e) {
51-
System.out.println("IO Exception =: " + e);
52-
}
53-
}
54-
55-
/************************************** ============ ************************/
56-
57-
/***********************************************************************************
58-
* byte to binary conversion
59-
***********************************************************************************/
60-
public static int to(Byte b) {
61-
int ret = b;
62-
if (ret < 0) {
63-
ret = ~b;
64-
ret = ret + 1;
65-
ret = ret ^ 255;
66-
ret += 1;
67-
}
68-
return ret;
69-
}
70-
71-
/***********************************************************************************/
72-
7317
/**********************************************************************************
7418
* freing the memory
7519
*********************************************************************************/
76-
public static void initHuffmanCompressor() {
77-
int i;
78-
uniqueCharCount = 0;
79-
if (root != null)
80-
HuffmanUtils.fredfs(root, HuffmanUtils.HUFFMAN_TREE_ACCESSOR);
81-
for (i = 0; i < 300; i++)
82-
frequency[i] = 0;
83-
for (i = 0; i < 300; i++)
84-
huffmanCodes[i] = "";
85-
priorityQueue.clear();
20+
public static void initHuffmanCompressor(HuffmanNode root) {
21+
if (root != null) HuffmanUtils.fredfs(root, HuffmanUtils.HUFFMAN_TREE_ACCESSOR);
8622
}
8723

8824
/**********************************************************************************/
@@ -106,26 +42,26 @@ public static void fredfs(HuffmanNode node) {
10642
/**********************************************************************************
10743
* dfs to make the codes
10844
*********************************************************************************/
109-
public static void generateHuffmanCodes(HuffmanNode node, String code) {
45+
public static void generateHuffmanCodes(HuffmanNode node, String code, String[] huffmanCodes) {
11046
node.code = code;
11147
if ((node.leftChild == null) && (node.rightChild == null)) {
11248
huffmanCodes[node.byteValue] = code;
11349
return;
11450
}
11551
if (node.leftChild != null)
116-
generateHuffmanCodes(node.leftChild, code + "0");
52+
generateHuffmanCodes(node.leftChild, code + "0", huffmanCodes);
11753
if (node.rightChild != null)
118-
generateHuffmanCodes(node.rightChild, code + "1");
54+
generateHuffmanCodes(node.rightChild, code + "1", huffmanCodes);
11955
}
12056

12157
/**********************************************************************************/
12258

12359
/*******************************************************************************
12460
* Making all the nodes in a priority Q making the tree
12561
*******************************************************************************/
126-
public static void buildHuffmanTree() {
62+
public static HuffmanNode buildHuffmanTree(int[] frequency, String[] huffmanCodes) {
12763
int i;
128-
priorityQueue.clear();
64+
PriorityQueue<HuffmanNode> priorityQueue = new PriorityQueue<HuffmanNode>();
12965

13066
for (i = 0; i < 300; i++) {
13167
if (frequency[i] != 0) {
@@ -135,21 +71,20 @@ public static void buildHuffmanTree() {
13571
Temp.leftChild = null;
13672
Temp.rightChild = null;
13773
priorityQueue.add(Temp);
138-
uniqueCharCount++;
13974
}
14075

14176
}
14277
HuffmanNode Temp1, Temp2;
14378

144-
if (uniqueCharCount == 0) {
145-
return;
146-
} else if (uniqueCharCount == 1) {
79+
if (Arrays.stream(frequency).filter(f -> f != 0).count() == 0) {
80+
return null;
81+
} else if (Arrays.stream(frequency).filter(f -> f != 0).count() == 1) {
14782
for (i = 0; i < 300; i++)
14883
if (frequency[i] != 0) {
14984
huffmanCodes[i] = "0";
15085
break;
15186
}
152-
return;
87+
return null;
15388
}
15489

15590
// will there b a problem if the file is empty
@@ -163,178 +98,95 @@ public static void buildHuffmanTree() {
16398
Temp.frequency = Temp1.frequency + Temp2.frequency;
16499
priorityQueue.add(Temp);
165100
}
166-
root = priorityQueue.poll();
101+
HuffmanNode root = priorityQueue.poll();
102+
return root;
167103
}
168104

169105
/*******************************************************************************/
170106

171-
/*******************************************************************************
172-
* encrypting
173-
*******************************************************************************/
174-
public static void encrypt(String filename) {
175-
File file = null;
176-
177-
file = new File(filename);
178-
try {
179-
FileInputStream file_input = new FileInputStream(file);
180-
DataInputStream data_in = new DataInputStream(file_input);
181-
while (true) {
182-
try {
183-
184-
currentByte = data_in.readByte();
185-
frequency[currentByte]++;
186-
} catch (EOFException eof) {
187-
System.out.println("End of File");
188-
break;
189-
}
190-
}
191-
file_input.close();
192-
data_in.close();
193-
194-
} catch (IOException e) {
195-
System.out.println("IO Exception =: " + e);
196-
}
197-
file = null;
198-
}
199-
200-
/*******************************************************************************/
201-
202-
/*******************************************************************************
203-
* fake zip creates a file "fakezip.txt" where puts the final binary codes
204-
* of the real zipped file
205-
*******************************************************************************/
206-
public static void fakezip(String filename) {
207107

108+
public static void zip(String filename, String filename1, int[] frequency, String[] huffmanCodes) {
208109
File filei, fileo;
209110
int i;
210-
211-
filei = new File(filename);
212-
fileo = new File("fakezipped.txt");
213-
try {
214-
FileInputStream file_input = new FileInputStream(filei);
215-
DataInputStream data_in = new DataInputStream(file_input);
216-
PrintStream ps = new PrintStream(fileo);
217-
218-
while (true) {
219-
try {
220-
currentByte = data_in.readByte();
221-
ps.print(huffmanCodes[to(currentByte)]);
222-
} catch (EOFException eof) {
223-
System.out.println("End of File");
224-
break;
225-
}
226-
}
227-
228-
file_input.close();
229-
data_in.close();
230-
ps.close();
231-
232-
} catch (IOException e) {
233-
System.out.println("IO Exception =: " + e);
234-
}
235-
filei = null;
236-
fileo = null;
237-
238-
}
239-
240-
/*******************************************************************************/
241-
242-
/*******************************************************************************
243-
* real zip according to codes of fakezip.txt (filename)
244-
*******************************************************************************/
245-
public static void realzip(String filename, String filename1) {
246-
File filei, fileo;
247-
int i, j = 10;
248111
Byte currentBytet;
249112

250113
filei = new File(filename);
251114
fileo = new File(filename1);
252115

116+
DataInputStream dataIn;
117+
DataOutputStream dataOut;
118+
253119
try {
120+
254121
FileInputStream file_input = new FileInputStream(filei);
255-
DataInputStream data_in = new DataInputStream(file_input);
122+
dataIn = new DataInputStream(file_input);
256123
FileOutputStream file_output = new FileOutputStream(fileo);
257-
DataOutputStream data_out = new DataOutputStream(file_output);
124+
dataOut = new DataOutputStream(file_output);
258125

259-
data_out.writeInt(uniqueCharCount);
126+
// Step1: Write the table size
127+
dataOut.writeInt((int)Arrays.stream(frequency).filter(f -> f != 0).count());
128+
129+
// Step2: Write the table
260130
for (i = 0; i < 256; i++) {
261131
if (frequency[i] != 0) {
262132
currentBytet = (byte) i;
263-
data_out.write(currentBytet);
264-
data_out.writeInt(frequency[i]);
133+
dataOut.write(currentBytet);
134+
dataOut.writeInt(frequency[i]);
135+
}
136+
}
137+
138+
// Step3: Because the table might have some padding to make it a multiple of 8,
139+
//we need to calculate the total number of binary digits mod 8
140+
int totalBinaryDigitsMod8 = 0;
141+
for(i = 0; i < 256; i++) {
142+
if (huffmanCodes[i] != null) {
143+
totalBinaryDigitsMod8 += huffmanCodes[i].length() * frequency[i];
144+
totalBinaryDigitsMod8 %= 8;
265145
}
266146
}
267-
long textraBits;
268-
textraBits = filei.length() % 8;
269-
textraBits = (8 - textraBits) % 8;
270-
extraBits = (int) textraBits;
271-
data_out.writeInt(extraBits);
147+
int extraBits = (8 - totalBinaryDigitsMod8)%8;
148+
dataOut.writeInt(extraBits);
149+
150+
151+
String bitBuffer = "";
272152
while (true) {
273153
try {
274-
currentByte = 0;
275-
byte ch;
276-
for (extraBits = 0; extraBits < 8; extraBits++) {
277-
ch = data_in.readByte();
278-
currentByte *= 2;
279-
if (ch == '1')
280-
currentByte++;
154+
byte currentByte = dataIn.readByte();
155+
String huffmanCodeOfCurrentByte = huffmanCodes[HuffmanUtils.to(currentByte)];
156+
bitBuffer += huffmanCodeOfCurrentByte;
157+
while(bitBuffer.length() >= 8) {
158+
dataOut.write(LzwCompressor.stringToByte(bitBuffer.substring(0, 8)));
159+
bitBuffer = bitBuffer.substring(8, bitBuffer.length());
281160
}
282-
data_out.write(currentByte);
283-
284161
} catch (EOFException eof) {
285162
int x;
286-
if (extraBits != 0) {
287-
for (x = extraBits; x < 8; x++) {
288-
currentByte *= 2;
289-
}
290-
data_out.write(currentByte);
163+
if (bitBuffer.length() != 0) {
164+
dataOut.write(LzwCompressor.stringToByte(bitBuffer));
291165
}
292-
293-
extraBits = (int) textraBits;
294-
System.out.println("extrabits: " + extraBits);
295-
System.out.println("End of File");
296166
break;
297167
}
298168
}
299-
data_in.close();
300-
data_out.close();
169+
dataIn.close();
170+
dataOut.close();
301171
file_input.close();
302172
file_output.close();
303173
System.out.println("output file's size: " + fileo.length());
304174

305175
} catch (IOException e) {
306176
System.out.println("IO exception = " + e);
307177
}
308-
filei.delete();
309178
filei = null;
310179
fileo = null;
311180
}
312181

313-
/*******************************************************************************/
314-
315-
/*
316-
* public static void main (String[] args) { initHzipping();
317-
* CalFreq("in.txt"); // calculate the frequency of each digit MakeNode();
318-
* // makeing corresponding nodes if(uniqueCharCount>1) generateHuffmanCodes(root,""); // dfs to make the
319-
* codes fakezip("in.txt"); // fake zip file which will have the binary of
320-
* the input to fakezipped.txt file
321-
* realzip("fakezipped.txt","in.txt"+".huffz"); // making the real zip
322-
* according the fakezip.txt file initHzipping();
323-
*
324-
* }
325-
*/
326-
327182
public static void beginHuffmanCompression(String arg1) {
328-
initHuffmanCompressor();
329-
calculateFrequencyOfBytesInFile(arg1); // calculate the frequency of each digit
330-
buildHuffmanTree(); // build huffman tree from frequencies
331-
if (uniqueCharCount > 1)
332-
generateHuffmanCodes(root, ""); // dfs to make the codes
333-
fakezip(arg1); // fake zip file which will have the binary of the input
334-
// to fakezipped.txt file
335-
realzip("fakezipped.txt", arg1 + ".huffz"); // making the real zip
336-
// according the fakezip.txt
337-
// file
338-
initHuffmanCompressor();
183+
initHuffmanCompressor(null);
184+
int[] frequency = HuffmanUtils.calculateFrequencyOfBytesInFile(arg1); // calculate the frequency of each digit
185+
String[] huffmanCodes = new String[300];
186+
HuffmanNode huffmanTree = buildHuffmanTree(frequency, huffmanCodes); // build huffman tree from frequencies
187+
if (Arrays.stream(frequency).filter(f -> f != 0).count() > 1)
188+
generateHuffmanCodes(huffmanTree, "", huffmanCodes); // dfs to make the codes
189+
zip(arg1, arg1 + ".huffz", frequency, huffmanCodes); // compress the file directly
190+
initHuffmanCompressor(huffmanTree);
339191
}
340192
}

0 commit comments

Comments
 (0)