Author: Vimil Saju
How do I Compress and Decompress files using LZW Algorithm.
Answer:
Here is a simple implemntation of LZW compression/Decompression algorithm. It is
not fast and compression ratio is very small. Here is the code.
1 unit RevLZW;
2
3 interface
4
5 uses
6 sysutils, classes, dialogs, windows;
7
8 const
9 tabsize: integer = 4095;
10 copybyte: integer = 0;
11 compbyte: integer = 1;
12 endlist: integer = -1;
13 nochar: integer = -2;
14 empty: integer = -3;
15 eofchar: integer = -4;
16 bufsize: integer = 32768;
17 maxstack: integer = 4096;
18 type
19 TStringObject = record
20 prevchar: integer;
21 nextchar: integer;
22 next: integer;
23 used: boolean;
24 nused: integer;
25 flocked: boolean;
26 end;
27
28 procedure Initialize;
29 procedure Terminate;
30 function OpenInputFile(fname: string): boolean;
31 function OpenOutputFile(fname: string): boolean;
32 function getbyte: integer;
33 procedure putbyte(c: integer);
34 procedure compress;
35 procedure decompress;
36 procedure putcode(code: integer; lbyte: boolean = false);
37 function getcode: integer;
38 function GetHashCode(prevc, nextc: integer): integer;
39 function findstring(prevc, nextc: integer): integer;
40 function MakeTableEntry(prevc: integer; nextc: integer): boolean;
41 procedure push(c: integer);
42 procedure pop(var c: integer);
43 procedure InitializeStringTable;
44
45 var
46 fsize: integer;
47 fread, fwrote: integer;
48 ihandle, ohandle: integer;
49 inbufpos, outbufpos: integer;
50 objectid: integer;
51 stringtable: array[0..4095] of TstringObject;
52 inblock: array[0..65535 {32767}] of char;
53 outblock: array[0..65535 {32767}] of char;
54 stack: array[0..4095] of char;
55 stackpointer: integer;
56 rembits: integer;
57 lastbyte: boolean;
58 rembitcount: integer;
59 lzwerr: boolean;
60 imap, omap: integer;
61 implementation
62
63 function OpenInputFile(fname: string): boolean;
64 begin
65 result := true;
66 ihandle := fileopen(fname, fmShareExclusive or fmOpenRead);
67 fsize := getfilesize(ihandle, nil);
68 if fsize < 32768 then
69 fileread(ihandle, inblock, fsize)
70 else
71 fileread(ihandle, inblock, 32768);
72 if ihandle = -1 then
73 result := false;
74 end;
75
76 function OpenOutputFile(fname: string): boolean;
77 begin
78 result := true;
79 ohandle := filecreate(fname);
80 if ohandle = -1 then
81 result := false;
82 end;
83
84 function getbyte: integer;
85 begin
86 if inbufpos = 32768 then
87 begin
88 inbufpos := 0;
89 fileread(ihandle, inblock, 32768);
90 end;
91 if fread = fsize then
92 result := eofchar
93 else
94 result := integer(inblock[inbufpos]);
95 inc(inbufpos);
96 inc(fread);
97 end;
98
99 procedure putbyte(c: integer);
100 begin
101 if outbufpos = 32768 then
102 begin
103 outbufpos := 0;
104 filewrite(ohandle, outblock, 32768);
105 end;
106 outblock[outbufpos] := char(c);
107 inc(outbufpos);
108 inc(fwrote);
109 end;
110
111 procedure Initialize;
112 begin
113 inbufpos := 0;
114 outbufpos := 0;
115 fread := 0;
116 fwrote := 0;
117 objectid := 0;
118 stackpointer := 0;
119 lastbyte := false;
120 rembits := empty;
121 rembitcount := 0;
122 lzwerr := false;
123 InitializeStringtable;
124 end;
125
126 procedure InitializeStringTable;
127 var
128 i: integer;
129 begin
130 objectid := 0;
131 for i := 0 to 4095 do
132 begin
133 with stringtable[i] do
134 begin
135 if not flocked then
136 begin
137 prevchar := nochar;
138 nextchar := nochar;
139 next := endlist;
140 used := false;
141 nused := 0;
142 flocked := false;
143 end;
144 end;
145 if i <= 255 then
146 begin
147 stringtable[i].nextchar := i;
148 stringtable[i].used := true;
149 inc(objectid);
150 end;
151 end;
152 end;
153
154 procedure Terminate;
155 begin
156 if outbufpos > 0 then
157 filewrite(ohandle, outblock, outbufpos);
158 setendoffile(ohandle);
159 fileclose(ihandle);
160 fileclose(ohandle);
161 end;
162
163 function GetHashCode(prevc, nextc: integer): integer;
164 var
165 index, newindex: integer;
166 begin
167 index := ((prevc shl 5) xor nextc) and tabsize;
168 if not stringtable[index].used then
169 result := index
170 else
171 begin
172 while stringtable[index].next <> endlist do
173 index := stringtable[index].next;
174 newindex := index and tabsize;
175 while stringtable[newindex].used do
176 newindex := succ(newindex) and tabsize;
177 stringtable[index].next := newindex;
178 result := newindex;
179 end;
180 end;
181
182 function findstring(prevc, nextc: integer): integer;
183 var
184 index: integer;
185 found: boolean;
186 begin
187 result := endlist;
188 if (prevc = nochar) and (nextc <= 255) then
189 result := nextc
190 else
191 begin
192 index := ((prevc shl 5) xor nextc) and tabsize;
193 repeat
194 found := (stringtable[index].prevchar = prevc) and
195 (stringtable[index].nextchar
196 = nextc);
197 if not found then
198 index := stringtable[index].next;
199 until found or (index = endlist);
200 if found then
201 begin
202 result := index;
203 inc(stringtable[index].nused);
204 end;
205 end;
206 end;
207
208 function MakeTableEntry(prevc: integer; nextc: integer): boolean;
209 var
210 index: integer;
211 begin
212 result := true;
213 if objectid <= tabsize then
214 begin
215 index := gethashcode(prevc, nextc);
216 with stringtable[index] do
217 begin
218 prevchar := prevc;
219 nextchar := nextc;
220 used := true;
221 end;
222 inc(objectid);
223 if objectid = tabsize + 1 then
224 result := false;
225 end;
226 end;
227
228 procedure putcode(code: integer; lbyte: boolean);
229 var
230 tmpcode: integer;
231 begin
232 if stringtable[code].prevchar = nochar then
233 begin
234 if rembitcount < 7 then
235 begin
236 tmpcode := (rembits shl (8 - rembitcount)) or (copybyte shl (7 - rembitcount))
237 or ((code shr (rembitcount + 1)) and ($7F shr rembitcount));
238 putbyte(tmpcode);
239 inc(fwrote);
240 rembits := code and ($FF shr (7 - rembitcount));
241 inc(rembitcount);
242 end
243 else if rembitcount = 7 then
244 begin
245 tmpcode := (rembits shl 1) or copybyte;
246 putbyte(tmpcode);
247 inc(fwrote, 2);
248 putbyte(code);
249 rembits := empty;
250 rembitcount := 0;
251 end;
252 end
253 else
254 begin
255 tmpcode := (rembits shl (8 - rembitcount)) or (compbyte shl (7 - rembitcount))
256 or
257 (code shr (5 + rembitcount) and ($7F shr rembitcount));
258 putbyte(tmpcode);
259 inc(fwrote);
260 rembitcount := rembitcount + 5;
261 if rembitcount < 8 then
262 rembits := code and ($FF shr (8 - rembitcount));
263 if rembitcount >= 8 then
264 begin
265 rembits := (code shr (rembitcount - 8)) and $FF;
266 inc(fwrote);
267 putbyte(rembits);
268 rembitcount := rembitcount - 8;
269 rembits := code and ($FF shr (8 - rembitcount));
270 end;
271 end;
272 if lbyte and (rembitcount > 0) then
273 begin
274 tmpcode := ((rembits and ($FF shr (8 - rembitcount))) shl (8 - rembitcount));
275 putbyte(tmpcode);
276 inc(fwrote);
277 end;
278 end;
279
280 function getcode: integer;
281 var
282 part1, part2: integer;
283 iscomp: integer;
284 c1, c2: integer;
285 begin
286 result := eofchar;
287 if (fread = fsize) and (rembitcount = 0) then
288 begin
289 result := eofchar;
290 exit;
291 end;
292 if rembitcount = 0 then
293 begin
294 part1 := getbyte;
295 part2 := getbyte;
296 iscomp := (part1 shr 7) and 1;
297 if iscomp = 1 then
298 begin
299 c1 := part1 and $7F;
300 c2 := (part2 shr 3) and $1F;
301 rembits := part2 and $7;
302 rembitcount := 3;
303 result := (c1 shl 5) or c2;
304 end
305 else if iscomp = 0 then
306 begin
307 c1 := part1 and $7F;
308 c2 := (part2 shr 7) and $1;
309 result := (c1 shl 1) or c2;
310 rembits := part2 and $7F;
311 rembitcount := 7;
312 end;
313 end
314 else if rembitcount = 1 then
315 begin
316 part1 := getbyte;
317 iscomp := rembits;
318 if iscomp = 1 then
319 begin
320 part2 := getbyte;
321 c1 := part1 and $FF;
322 c2 := (part2 shr 4) and $F;
323 rembits := part2 and $F;
324 rembitcount := 4;
325 result := (c1 shl 4) or c2;
326 end
327 else if iscomp = 0 then
328 begin
329 c1 := part1 and $FF;
330 result := c1;
331 rembits := empty;
332 rembitcount := 0;
333 end;
334 end
335 else if rembitcount = 2 then
336 begin
337 part1 := getbyte;
338 iscomp := (rembits shr 1) and 1;
339 if iscomp = 1 then
340 begin
341 part2 := getbyte;
342 c1 := ((rembits and 1) shl 7) or ((part1 shr 1) and $7F);
343 c2 := ((part1 and 1) shl 3) or ((part2 shr 5) and $7);
344 rembits := part2 and $1F;
345 rembitcount := 5;
346 result := (c1 shl 4) or (c2 and $F);
347 end
348 else if iscomp = 0 then
349 begin
350 c1 := ((rembits and 1) shl 7) or ((part1 shr 1) and $7F);
351 result := c1;
352 rembits := part1 and 1;
353 rembitcount := 1;
354 end;
355 end
356 else if rembitcount = 3 then
357 begin
358 part1 := getbyte;
359 iscomp := (rembits shr 2) and 1;
360 if iscomp = 1 then
361 begin
362 part2 := getbyte;
363 c1 := ((rembits and $3) shl 6) or ((part1 shr 2) and $3F);
364 c2 := ((part1 and $3) shl 2) or ((part2 shr 6) and $3);
365 rembits := part2 and $3F;
366 rembitcount := 6;
367 result := (c1 shl 4) or (c2 and $F);
368 end
369 else if iscomp = 0 then
370 begin
371 c1 := ((rembits and $3) shl 6) or ((part1 shr 2) and $3F);
372 result := c1;
373 rembits := part1 and $3;
374 rembitcount := 2;
375 end;
376 end
377 else if rembitcount = 4 then
378 begin
379 part1 := getbyte;
380 iscomp := (rembits shr 3) and 1;
381 if iscomp = 1 then
382 begin
383 part2 := getbyte;
384 c1 := ((rembits and $7) shl 5) or ((part1 shr 3) and $1F);
385 c2 := ((part1 and $7) shl 1) or ((part2 shr 7) and $1);
386 rembits := part2 and $7F;
387 rembitcount := 7;
388 result := (c1 shl 4) or (c2 and $F);
389 end
390 else if iscomp = 0 then
391 begin
392 c1 := ((rembits and $7) shl 5) or ((part1 shr 3) and $1F);
393 result := c1;
394 rembits := part1 and $7;
395 rembitcount := 3;
396 end;
397 end
398 else if rembitcount = 5 then
399 begin
400 part1 := getbyte;
401 iscomp := (rembits shr 4) and 1;
402 if iscomp = 1 then
403 begin
404 c1 := ((rembits and $F) shl 4) or ((part1 shr 4) and $F);
405 c2 := part1 and $F;
406 rembits := empty;
407 rembitcount := 0;
408 result := (c1 shl 4) or (c2 and $F);
409 end
410 else if iscomp = 0 then
411 begin
412 c1 := ((rembits and $F) shl 4) or ((part1 shr 4) and $F);
413 result := c1;
414 rembits := part1 and $F;
415 rembitcount := 4;
416 end;
417 end
418 else if rembitcount = 6 then
419 begin
420 part1 := getbyte;
421 iscomp := (rembits shr 5) and 1;
422 if iscomp = 1 then
423 begin
424 c1 := ((rembits and $1F) shl 3) or ((part1 shr 5) and $7);
425 c2 := (part1 shr 1) and $F;
426 rembits := part1 and 1;
427 rembitcount := 1;
428 result := (c1 shl 4) or (c2 and $F);
429 end
430 else if iscomp = 0 then
431 begin
432 c1 := ((rembits and $1F) shl 3) or ((part1 shr 5) and $7);
433 result := c1;
434 rembits := part1 and $1F;
435 rembitcount := 5;
436 end;
437 end
438 else if rembitcount = 7 then
439 begin
440 part1 := getbyte;
441 iscomp := (rembits shr 6) and 1;
442 if iscomp = 1 then
443 begin
444 c1 := ((rembits and $3F) shl 2) or ((part1 shr 6) and $3);
445 c2 := (part1 shr 2) and $F;
446 rembits := part1 and $3;
447 rembitcount := 2;
448 result := (c1 shl 4) or (c2 and $F);
449 end
450 else if iscomp = 0 then
451 begin
452 c1 := ((rembits and $3F) shl 2) or ((part1 shr 6) and $3);
453 result := c1;
454 rembits := part1 and $3F;
455 rembitcount := 6;
456 end;
457 end;
458 end;
459
460 procedure compress;
461 var
462 c, wc, w: integer;
463 begin
464 initialize;
465 c := getbyte;
466 w := findstring(nochar, c);
467 c := getbyte;
468 while fread <= fsize - 1 do
469 begin
470 if lastbyte then
471 begin
472 putcode(w);
473 lastbyte := false;
474 InitializeStringtable;
475 c := getbyte;
476 w := findstring(nochar, c);
477 c := getbyte;
478 end;
479 wc := findstring(w, c);
480 if wc = endlist then
481 begin
482 lastbyte := not (MakeTableEntry(w, c));
483 putcode(w);
484 w := findstring(nochar, c);
485 end
486 else
487 w := wc;
488 if not lastbyte then
489 c := getbyte;
490 end;
491 putcode(w, true);
492 end;
493
494 procedure decompress;
495 var
496 unknown: boolean;
497 finchar, lastchar: integer;
498 code, oldcode, incode: integer;
499 c, tempc: integer;
500 begin
501 initialize;
502 unknown := false;
503 lastchar := empty;
504 oldcode := getcode;
505 code := oldcode;
506 c := stringtable[code].nextchar;
507 putbyte(c);
508 finchar := c;
509 incode := getcode;
510 while incode <> eofchar do
511 begin
512 if lastbyte then
513 begin
514 lastbyte := false;
515 InitializeStringTable;
516 stackpointer := 0;
517 unknown := false;
518 lastchar := empty;
519 oldcode := getcode;
520 code := oldcode;
521 c := stringtable[code].nextchar;
522 putbyte(c);
523 finchar := c;
524 incode := getcode;
525 end;
526 code := incode;
527 if not stringtable[code].used then
528 begin
529 lastchar := finchar;
530 code := oldcode;
531 unknown := true;
532 end;
533 while (stringtable[code].prevchar <> nochar) do
534 begin
535 push(stringtable[code].nextchar);
536 if lzwerr = true then
537 break;
538 code := stringtable[code].prevchar;
539 end;
540 if lzwerr = true then
541 break;
542 finchar := stringtable[code].nextchar;
543 putbyte(finchar);
544 pop(tempc);
545 while (tempc <> empty) do
546 begin
547 putbyte(tempc);
548 pop(tempc);
549 end;
550 if unknown then
551 begin
552 finchar := lastchar;
553 putbyte(finchar);
554 unknown := false;
555 end;
556 lastbyte := not (maketableentry(oldcode, finchar));
557 if not lastbyte then
558 begin
559 oldcode := incode;
560 incode := getcode;
561 end
562 end;
563 end;
564
565 procedure push(c: integer);
566 var
567 s: string;
568 begin
569 if stackpointer < 4096 then
570 begin
571 inc(stackpointer);
572 stack[stackpointer] := char(c);
573 end;
574 if stackpointer >= 4096 then
575 begin
576 s := 'Stack full at ' + inttostr(inbufpos);
577 lzwerr := true;
578 showmessage(s);
579 end;
580 end;
581
582 procedure pop(var c: integer);
583 begin
584 if stackpointer > 0 then
585 begin
586 c := integer(stack[stackpointer]);
587 dec(stackpointer);
588 end
589 else
590 c := empty;
591 end;
592
593 end.
594
595 //To compress the file add the following code to a button
596
597 openinputfile('C:\cdidxtmp\myfile.exe');
598 openoutputfile('C:\cdidxtmp\myfile.bak');
599 initialize;
600 compress;
601
602 //To Decompress
603
604 openinputfile('C:\cdidxtmp\myfile.bak');
605 openoutputfile('C:\cdidxtmp\myfile.exe');
606 initialize;
607 decompress;
|