1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
\r
3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
\r
5 ; * Copyright (C) 1995-2003 Mark Adler
\r
6 ; * For conditions of distribution and use, see copyright notice in zlib.h
\r
8 ; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
\r
9 ; * Please use the copyright conditions above.
\r
11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
\r
12 ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
\r
13 ; * the moment. I have successfully compiled and tested this code with gcc2.96,
\r
14 ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
\r
15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
\r
16 ; * enabled. I will attempt to merge the MMX code into this version. Newer
\r
17 ; * versions of this and inffast.S can be found at
\r
18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
\r
20 ; * 2005 : modification by Gilles Vollant
\r
22 ; For Visual C++ 4.x and higher and ML 6.x and higher
\r
23 ; ml.exe is in directory \MASM611C of Win95 DDK
\r
24 ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
\r
25 ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
\r
28 ; compile with command line option
\r
29 ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
\r
31 ; if you define NO_GZIP (see inflate.h), compile with
\r
32 ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
\r
35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
\r
36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
\r
37 ; in inflate_state in inflate.h)
\r
42 INFLATE_MODE_TYPE equ 11
\r
43 INFLATE_MODE_BAD equ 26
\r
46 INFLATE_MODE_TYPE equ 11
\r
47 INFLATE_MODE_BAD equ 26
\r
49 INFLATE_MODE_TYPE equ 3
\r
50 INFLATE_MODE_BAD equ 17
\r
58 ;;;GLOBAL _inflate_fast
\r
67 name inflate_fast_x86
\r
71 inflate_fast_use_mmx:
\r
76 PUBLIC _inflate_fast
\r
80 jmp inflate_fast_entry
\r
85 db 'Fast decoding Code from Chris Anderson'
\r
89 invalid_literal_length_code_msg:
\r
90 db 'invalid literal/length code'
\r
94 invalid_distance_code_msg:
\r
95 db 'invalid distance code'
\r
99 invalid_distance_too_far_msg:
\r
100 db 'invalid distance too far back'
\r
141 mode_state equ 0 ;/* state->mode */
\r
142 wsize_state equ (32+zlib1222sup) ;/* state->wsize */
\r
143 write_state equ (36+4+zlib1222sup) ;/* state->write */
\r
144 window_state equ (40+4+zlib1222sup) ;/* state->window */
\r
145 hold_state equ (44+4+zlib1222sup) ;/* state->hold */
\r
146 bits_state equ (48+4+zlib1222sup) ;/* state->bits */
\r
147 lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */
\r
148 distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */
\r
149 lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */
\r
150 distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */
\r
155 ;GLOBAL inflate_fast_use_mmx
\r
160 ; GLOBAL inflate_fast_use_mmx:object
\r
161 ;.size inflate_fast_use_mmx, 4
\r
166 inflate_fast_entry:
\r
211 mov eax, [edi+lencode_state]
\r
212 mov ecx, [edi+distcode_state]
\r
218 mov ecx, [edi+lenbits_state]
\r
224 mov ecx, [edi+distbits_state]
\r
229 mov eax, [edi+wsize_state]
\r
230 mov ecx, [edi+write_state]
\r
231 mov edx, [edi+window_state]
\r
237 mov ebp, [edi+hold_state]
\r
238 mov ebx, [edi+bits_state]
\r
275 cmp dword ptr [inflate_fast_use_mmx],2
\r
285 xor dword ptr [esp],0200000h
\r
313 mov dword ptr [inflate_fast_use_mmx],2
\r
314 jmp L_check_mmx_pop
\r
316 mov dword ptr [inflate_fast_use_mmx],3
\r
328 ja L_get_length_code
\r
341 mov eax, [ecx+edx*4]
\r
360 jnz L_test_for_length_base
\r
375 L_test_for_length_base:
\r
382 jz L_test_for_second_level_length
\r
386 jae L_add_bits_to_len
\r
413 ja L_get_distance_code
\r
422 L_get_distance_code:
\r
426 mov eax, [ecx+edx*4]
\r
439 jz L_test_for_second_level_dist
\r
441 jz L_check_dist_one
\r
443 jae L_add_bits_to_dist
\r
454 L_add_bits_to_dist:
\r
512 L_test_for_second_level_length:
\r
518 jnz L_test_for_end_of_block
\r
526 mov eax, [edx+eax*4]
\r
530 L_test_for_second_level_dist:
\r
536 jnz L_invalid_distance_code
\r
544 mov eax, [edx+eax*4]
\r
556 jb L_invalid_distance_too_far
\r
559 cmp dword ptr [esp+48],0
\r
560 jne L_wrap_around_window
\r
584 L_wrap_around_window:
\r
588 jbe L_contiguous_in_window
\r
613 L_contiguous_in_window:
\r
647 movd mm4,dword ptr [esp+0]
\r
649 movd mm5,dword ptr [esp+4]
\r
660 ja L_get_length_code_mmx
\r
663 movd mm7,dword ptr [esi]
\r
669 L_get_length_code_mmx:
\r
673 mov eax, [ebx+eax*4]
\r
681 jnz L_test_for_length_base_mmx
\r
696 L_test_for_length_base_mmx:
\r
702 jz L_test_for_second_level_length_mmx
\r
704 jz L_decode_distance_mmx
\r
710 and ecx, [inflate_fast_mask+eax*4]
\r
713 L_decode_distance_mmx:
\r
717 ja L_get_dist_code_mmx
\r
720 movd mm7,dword ptr [esi]
\r
726 L_get_dist_code_mmx:
\r
731 mov eax, [ebx+eax*4]
\r
742 jz L_test_for_second_level_dist_mmx
\r
744 jz L_check_dist_one_mmx
\r
746 L_add_bits_to_dist_mmx:
\r
751 and ecx, [inflate_fast_mask+eax*4]
\r
754 L_check_window_mmx:
\r
760 jb L_clip_window_mmx
\r
779 jmp L_while_test_mmx
\r
782 L_check_dist_one_mmx:
\r
784 jne L_check_window_mmx
\r
786 je L_check_window_mmx
\r
800 jmp L_while_test_mmx
\r
803 L_test_for_second_level_length_mmx:
\r
805 jnz L_test_for_end_of_block
\r
810 and ecx, [inflate_fast_mask+eax*4]
\r
812 mov eax, [ebx+ecx*4]
\r
816 L_test_for_second_level_dist_mmx:
\r
818 jnz L_invalid_distance_code
\r
823 and ecx, [inflate_fast_mask+eax*4]
\r
826 mov eax, [eax+ecx*4]
\r
838 jb L_invalid_distance_too_far
\r
841 cmp dword ptr [esp+48],0
\r
842 jne L_wrap_around_window_mmx
\r
865 L_wrap_around_window_mmx:
\r
869 jbe L_contiguous_in_window_mmx
\r
893 L_contiguous_in_window_mmx:
\r
915 jmp L_while_test_mmx
\r
917 L_invalid_distance_code:
\r
923 mov ecx, invalid_distance_code_msg
\r
924 mov edx,INFLATE_MODE_BAD
\r
925 jmp L_update_stream_state
\r
927 L_test_for_end_of_block:
\r
934 jz L_invalid_literal_length_code
\r
937 mov edx,INFLATE_MODE_TYPE
\r
938 jmp L_update_stream_state
\r
940 L_invalid_literal_length_code:
\r
946 mov ecx, invalid_literal_length_code_msg
\r
947 mov edx,INFLATE_MODE_BAD
\r
948 jmp L_update_stream_state
\r
950 L_invalid_distance_too_far:
\r
955 mov ecx, invalid_distance_too_far_msg
\r
956 mov edx,INFLATE_MODE_BAD
\r
957 jmp L_update_stream_state
\r
959 L_update_stream_state:
\r
967 mov [eax+mode_state],edx
\r
973 cmp dword ptr [inflate_fast_use_mmx],2
\r
974 jne L_update_next_in
\r
990 mov [edx+bits_state],ebx
\r
1016 cmp dword ptr [inflate_fast_use_mmx],2
\r
1031 mov [edx+hold_state],ebp
\r
1038 jbe L_last_is_smaller
\r
1044 L_last_is_smaller:
\r
1057 jbe L_end_is_smaller
\r