77import java .io .FileInputStream ;
88import java .io .FileOutputStream ;
99import java .io .IOException ;
10- import java .io . PrintStream ;
10+ import java .util . Arrays ;
1111import java .util .PriorityQueue ;
1212
13+ import prog .lzw .LzwCompressor ;
14+
1315//if the frequency of a byte is more than 2^32 then there will be problem
1416public class HuffmanCompressor {
15-
16- static PriorityQueue <HuffmanNode > priorityQueue = new PriorityQueue <HuffmanNode >();
17- static int [] frequency = new int [300 ];
18- static String [] huffmanCodes = new String [300 ];
19- static int extraBits ;
20- static byte currentByte ;
21- static int uniqueCharCount ; // number of different characters
22-
23- // for keeping frequncies of all the bytes
24-
25- // main tree class
26-
27- static HuffmanNode root ;
28-
29- /*******************************************************************************
30- * calculating frequence of file filename
31- ******************************************************************************/
32- public static void calculateFrequencyOfBytesInFile (String filename ) {
33- File file = new File (filename );;
34- Byte currentByte ;
35- try {
36- FileInputStream file_input = new FileInputStream (file );
37- DataInputStream data_in = new DataInputStream (file_input );
38- while (true ) {
39- try {
40-
41- currentByte = data_in .readByte ();
42- frequency [HuffmanUtils .to (currentByte )]++;
43- } catch (EOFException eof ) {
44- System .out .println ("End of File" );
45- break ;
46- }
47- }
48- file_input .close ();
49- data_in .close ();
50- } catch (IOException e ) {
51- System .out .println ("IO Exception =: " + e );
52- }
53- }
54-
55- /************************************** ============ ************************/
56-
57- /***********************************************************************************
58- * byte to binary conversion
59- ***********************************************************************************/
60- public static int to (Byte b ) {
61- int ret = b ;
62- if (ret < 0 ) {
63- ret = ~b ;
64- ret = ret + 1 ;
65- ret = ret ^ 255 ;
66- ret += 1 ;
67- }
68- return ret ;
69- }
70-
71- /***********************************************************************************/
72-
7317 /**********************************************************************************
7418 * freing the memory
7519 *********************************************************************************/
76- public static void initHuffmanCompressor () {
77- int i ;
78- uniqueCharCount = 0 ;
79- if (root != null )
80- HuffmanUtils .fredfs (root , HuffmanUtils .HUFFMAN_TREE_ACCESSOR );
81- for (i = 0 ; i < 300 ; i ++)
82- frequency [i ] = 0 ;
83- for (i = 0 ; i < 300 ; i ++)
84- huffmanCodes [i ] = "" ;
85- priorityQueue .clear ();
20+ public static void initHuffmanCompressor (HuffmanNode root ) {
21+ if (root != null ) HuffmanUtils .fredfs (root , HuffmanUtils .HUFFMAN_TREE_ACCESSOR );
8622 }
8723
8824 /**********************************************************************************/
@@ -106,26 +42,26 @@ public static void fredfs(HuffmanNode node) {
10642 /**********************************************************************************
10743 * dfs to make the codes
10844 *********************************************************************************/
109- public static void generateHuffmanCodes (HuffmanNode node , String code ) {
45+ public static void generateHuffmanCodes (HuffmanNode node , String code , String [] huffmanCodes ) {
11046 node .code = code ;
11147 if ((node .leftChild == null ) && (node .rightChild == null )) {
11248 huffmanCodes [node .byteValue ] = code ;
11349 return ;
11450 }
11551 if (node .leftChild != null )
116- generateHuffmanCodes (node .leftChild , code + "0" );
52+ generateHuffmanCodes (node .leftChild , code + "0" , huffmanCodes );
11753 if (node .rightChild != null )
118- generateHuffmanCodes (node .rightChild , code + "1" );
54+ generateHuffmanCodes (node .rightChild , code + "1" , huffmanCodes );
11955 }
12056
12157 /**********************************************************************************/
12258
12359 /*******************************************************************************
12460 * Making all the nodes in a priority Q making the tree
12561 *******************************************************************************/
126- public static void buildHuffmanTree () {
62+ public static HuffmanNode buildHuffmanTree (int [] frequency , String [] huffmanCodes ) {
12763 int i ;
128- priorityQueue . clear ();
64+ PriorityQueue < HuffmanNode > priorityQueue = new PriorityQueue < HuffmanNode > ();
12965
13066 for (i = 0 ; i < 300 ; i ++) {
13167 if (frequency [i ] != 0 ) {
@@ -135,21 +71,20 @@ public static void buildHuffmanTree() {
13571 Temp .leftChild = null ;
13672 Temp .rightChild = null ;
13773 priorityQueue .add (Temp );
138- uniqueCharCount ++;
13974 }
14075
14176 }
14277 HuffmanNode Temp1 , Temp2 ;
14378
144- if (uniqueCharCount == 0 ) {
145- return ;
146- } else if (uniqueCharCount == 1 ) {
79+ if (Arrays . stream ( frequency ). filter ( f -> f != 0 ). count () == 0 ) {
80+ return null ;
81+ } else if (Arrays . stream ( frequency ). filter ( f -> f != 0 ). count () == 1 ) {
14782 for (i = 0 ; i < 300 ; i ++)
14883 if (frequency [i ] != 0 ) {
14984 huffmanCodes [i ] = "0" ;
15085 break ;
15186 }
152- return ;
87+ return null ;
15388 }
15489
15590 // will there b a problem if the file is empty
@@ -163,178 +98,95 @@ public static void buildHuffmanTree() {
16398 Temp .frequency = Temp1 .frequency + Temp2 .frequency ;
16499 priorityQueue .add (Temp );
165100 }
166- root = priorityQueue .poll ();
101+ HuffmanNode root = priorityQueue .poll ();
102+ return root ;
167103 }
168104
169105 /*******************************************************************************/
170106
171- /*******************************************************************************
172- * encrypting
173- *******************************************************************************/
174- public static void encrypt (String filename ) {
175- File file = null ;
176-
177- file = new File (filename );
178- try {
179- FileInputStream file_input = new FileInputStream (file );
180- DataInputStream data_in = new DataInputStream (file_input );
181- while (true ) {
182- try {
183-
184- currentByte = data_in .readByte ();
185- frequency [currentByte ]++;
186- } catch (EOFException eof ) {
187- System .out .println ("End of File" );
188- break ;
189- }
190- }
191- file_input .close ();
192- data_in .close ();
193-
194- } catch (IOException e ) {
195- System .out .println ("IO Exception =: " + e );
196- }
197- file = null ;
198- }
199-
200- /*******************************************************************************/
201-
202- /*******************************************************************************
203- * fake zip creates a file "fakezip.txt" where puts the final binary codes
204- * of the real zipped file
205- *******************************************************************************/
206- public static void fakezip (String filename ) {
207107
108+ public static void zip (String filename , String filename1 , int [] frequency , String [] huffmanCodes ) {
208109 File filei , fileo ;
209110 int i ;
210-
211- filei = new File (filename );
212- fileo = new File ("fakezipped.txt" );
213- try {
214- FileInputStream file_input = new FileInputStream (filei );
215- DataInputStream data_in = new DataInputStream (file_input );
216- PrintStream ps = new PrintStream (fileo );
217-
218- while (true ) {
219- try {
220- currentByte = data_in .readByte ();
221- ps .print (huffmanCodes [to (currentByte )]);
222- } catch (EOFException eof ) {
223- System .out .println ("End of File" );
224- break ;
225- }
226- }
227-
228- file_input .close ();
229- data_in .close ();
230- ps .close ();
231-
232- } catch (IOException e ) {
233- System .out .println ("IO Exception =: " + e );
234- }
235- filei = null ;
236- fileo = null ;
237-
238- }
239-
240- /*******************************************************************************/
241-
242- /*******************************************************************************
243- * real zip according to codes of fakezip.txt (filename)
244- *******************************************************************************/
245- public static void realzip (String filename , String filename1 ) {
246- File filei , fileo ;
247- int i , j = 10 ;
248111 Byte currentBytet ;
249112
250113 filei = new File (filename );
251114 fileo = new File (filename1 );
252115
116+ DataInputStream dataIn ;
117+ DataOutputStream dataOut ;
118+
253119 try {
120+
254121 FileInputStream file_input = new FileInputStream (filei );
255- DataInputStream data_in = new DataInputStream (file_input );
122+ dataIn = new DataInputStream (file_input );
256123 FileOutputStream file_output = new FileOutputStream (fileo );
257- DataOutputStream data_out = new DataOutputStream (file_output );
124+ dataOut = new DataOutputStream (file_output );
258125
259- data_out .writeInt (uniqueCharCount );
126+ // Step1: Write the table size
127+ dataOut .writeInt ((int )Arrays .stream (frequency ).filter (f -> f != 0 ).count ());
128+
129+ // Step2: Write the table
260130 for (i = 0 ; i < 256 ; i ++) {
261131 if (frequency [i ] != 0 ) {
262132 currentBytet = (byte ) i ;
263- data_out .write (currentBytet );
264- data_out .writeInt (frequency [i ]);
133+ dataOut .write (currentBytet );
134+ dataOut .writeInt (frequency [i ]);
135+ }
136+ }
137+
138+ // Step3: Because the table might have some padding to make it a multiple of 8,
139+ //we need to calculate the total number of binary digits mod 8
140+ int totalBinaryDigitsMod8 = 0 ;
141+ for (i = 0 ; i < 256 ; i ++) {
142+ if (huffmanCodes [i ] != null ) {
143+ totalBinaryDigitsMod8 += huffmanCodes [i ].length () * frequency [i ];
144+ totalBinaryDigitsMod8 %= 8 ;
265145 }
266146 }
267- long textraBits ;
268- textraBits = filei . length () % 8 ;
269- textraBits = ( 8 - textraBits ) % 8 ;
270- extraBits = ( int ) textraBits ;
271- data_out . writeInt ( extraBits ) ;
147+ int extraBits = ( 8 - totalBinaryDigitsMod8 )% 8 ;
148+ dataOut . writeInt ( extraBits ) ;
149+
150+
151+ String bitBuffer = "" ;
272152 while (true ) {
273153 try {
274- currentByte = 0 ;
275- byte ch ;
276- for (extraBits = 0 ; extraBits < 8 ; extraBits ++) {
277- ch = data_in .readByte ();
278- currentByte *= 2 ;
279- if (ch == '1' )
280- currentByte ++;
154+ byte currentByte = dataIn .readByte ();
155+ String huffmanCodeOfCurrentByte = huffmanCodes [HuffmanUtils .to (currentByte )];
156+ bitBuffer += huffmanCodeOfCurrentByte ;
157+ while (bitBuffer .length () >= 8 ) {
158+ dataOut .write (LzwCompressor .stringToByte (bitBuffer .substring (0 , 8 )));
159+ bitBuffer = bitBuffer .substring (8 , bitBuffer .length ());
281160 }
282- data_out .write (currentByte );
283-
284161 } catch (EOFException eof ) {
285162 int x ;
286- if (extraBits != 0 ) {
287- for (x = extraBits ; x < 8 ; x ++) {
288- currentByte *= 2 ;
289- }
290- data_out .write (currentByte );
163+ if (bitBuffer .length () != 0 ) {
164+ dataOut .write (LzwCompressor .stringToByte (bitBuffer ));
291165 }
292-
293- extraBits = (int ) textraBits ;
294- System .out .println ("extrabits: " + extraBits );
295- System .out .println ("End of File" );
296166 break ;
297167 }
298168 }
299- data_in .close ();
300- data_out .close ();
169+ dataIn .close ();
170+ dataOut .close ();
301171 file_input .close ();
302172 file_output .close ();
303173 System .out .println ("output file's size: " + fileo .length ());
304174
305175 } catch (IOException e ) {
306176 System .out .println ("IO exception = " + e );
307177 }
308- filei .delete ();
309178 filei = null ;
310179 fileo = null ;
311180 }
312181
313- /*******************************************************************************/
314-
315- /*
316- * public static void main (String[] args) { initHzipping();
317- * CalFreq("in.txt"); // calculate the frequency of each digit MakeNode();
318- * // makeing corresponding nodes if(uniqueCharCount>1) generateHuffmanCodes(root,""); // dfs to make the
319- * codes fakezip("in.txt"); // fake zip file which will have the binary of
320- * the input to fakezipped.txt file
321- * realzip("fakezipped.txt","in.txt"+".huffz"); // making the real zip
322- * according the fakezip.txt file initHzipping();
323- *
324- * }
325- */
326-
327182 public static void beginHuffmanCompression (String arg1 ) {
328- initHuffmanCompressor ();
329- calculateFrequencyOfBytesInFile (arg1 ); // calculate the frequency of each digit
330- buildHuffmanTree (); // build huffman tree from frequencies
331- if (uniqueCharCount > 1 )
332- generateHuffmanCodes (root , "" ); // dfs to make the codes
333- fakezip (arg1 ); // fake zip file which will have the binary of the input
334- // to fakezipped.txt file
335- realzip ("fakezipped.txt" , arg1 + ".huffz" ); // making the real zip
336- // according the fakezip.txt
337- // file
338- initHuffmanCompressor ();
183+ initHuffmanCompressor (null );
184+ int [] frequency = HuffmanUtils .calculateFrequencyOfBytesInFile (arg1 ); // calculate the frequency of each digit
185+ String [] huffmanCodes = new String [300 ];
186+ HuffmanNode huffmanTree = buildHuffmanTree (frequency , huffmanCodes ); // build huffman tree from frequencies
187+ if (Arrays .stream (frequency ).filter (f -> f != 0 ).count () > 1 )
188+ generateHuffmanCodes (huffmanTree , "" , huffmanCodes ); // dfs to make the codes
189+ zip (arg1 , arg1 + ".huffz" , frequency , huffmanCodes ); // compress the file directly
190+ initHuffmanCompressor (huffmanTree );
339191 }
340192}
0 commit comments