• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
2; *
3; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
4; *
5; * Copyright (C) 1995-2003 Mark Adler
6; * For conditions of distribution and use, see copyright notice in zlib.h
7; *
8; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9; * Please use the copyright conditions above.
10; *
11; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
12; * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
13; * the moment.  I have successfully compiled and tested this code with gcc2.96,
14; * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
15; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
16; * enabled.  I will attempt to merge the MMX code into this version.  Newer
17; * versions of this and inffast.S can be found at
18; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
19; *
20; * 2005 : modification by Gilles Vollant
21; */
22; For Visual C++ 4.x and higher and ML 6.x and higher
23;   ml.exe is in directory \MASM611C of Win95 DDK
24;   ml.exe is also distributed in http://www.masm32.com/masmdl.htm
25;    and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
26;
27;
28;   compile with command line option
29;   ml  /coff /Zi /c /Flinffas32.lst inffas32.asm
30
31;   if you define NO_GZIP (see inflate.h), compile with
32;   ml  /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
33
34
35; zlib122sup is 0 fort zlib 1.2.2.1 and lower
36; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
37;        in inflate_state in inflate.h)
38zlib1222sup      equ    8
39
40
41IFDEF GUNZIP
42  INFLATE_MODE_TYPE    equ 11
43  INFLATE_MODE_BAD     equ 26
44ELSE
45  IFNDEF NO_GUNZIP
46    INFLATE_MODE_TYPE    equ 11
47    INFLATE_MODE_BAD     equ 26
48  ELSE
49    INFLATE_MODE_TYPE    equ 3
50    INFLATE_MODE_BAD     equ 17
51  ENDIF
52ENDIF
53
54
55; 75 "inffast.S"
56;FILE "inffast.S"
57
58;;;GLOBAL _inflate_fast
59
60;;;SECTION .text
61
62
63
64	.586p
65	.mmx
66
67	name	inflate_fast_x86
68	.MODEL	FLAT
69
70_DATA			segment
71inflate_fast_use_mmx:
72	dd	1
73
74
75_TEXT			segment
76PUBLIC _inflate_fast
77
78ALIGN 4
79_inflate_fast:
80	jmp inflate_fast_entry
81
82
83
84ALIGN 4
85	db	'Fast decoding Code from Chris Anderson'
86	db	0
87
88ALIGN 4
89invalid_literal_length_code_msg:
90	db	'invalid literal/length code'
91	db	0
92
93ALIGN 4
94invalid_distance_code_msg:
95	db	'invalid distance code'
96	db	0
97
98ALIGN 4
99invalid_distance_too_far_msg:
100	db	'invalid distance too far back'
101	db	0
102
103
104ALIGN 4
105inflate_fast_mask:
106dd	0
107dd	1
108dd	3
109dd	7
110dd	15
111dd	31
112dd	63
113dd	127
114dd	255
115dd	511
116dd	1023
117dd	2047
118dd	4095
119dd	8191
120dd	16383
121dd	32767
122dd	65535
123dd	131071
124dd	262143
125dd	524287
126dd	1048575
127dd	2097151
128dd	4194303
129dd	8388607
130dd	16777215
131dd	33554431
132dd	67108863
133dd	134217727
134dd	268435455
135dd	536870911
136dd	1073741823
137dd	2147483647
138dd	4294967295
139
140
141mode_state	 equ	0	;/* state->mode	*/
142wsize_state	 equ	(32+zlib1222sup)	;/* state->wsize */
143write_state	 equ	(36+4+zlib1222sup)	;/* state->write */
144window_state	 equ	(40+4+zlib1222sup)	;/* state->window */
145hold_state	 equ	(44+4+zlib1222sup)	;/* state->hold	*/
146bits_state	 equ	(48+4+zlib1222sup)	;/* state->bits	*/
147lencode_state	 equ	(64+4+zlib1222sup)	;/* state->lencode */
148distcode_state	 equ	(68+4+zlib1222sup)	;/* state->distcode */
149lenbits_state	 equ	(72+4+zlib1222sup)	;/* state->lenbits */
150distbits_state	 equ	(76+4+zlib1222sup)	;/* state->distbits */
151
152
153;;SECTION .text
154; 205 "inffast.S"
155;GLOBAL	inflate_fast_use_mmx
156
157;SECTION .data
158
159
160; GLOBAL inflate_fast_use_mmx:object
161;.size inflate_fast_use_mmx, 4
162; 226 "inffast.S"
163;SECTION .text
164
165ALIGN 4
166inflate_fast_entry:
167	push  edi
168	push  esi
169	push  ebp
170	push  ebx
171	pushfd
172	sub  esp,64
173	cld
174
175
176
177
178	mov  esi, [esp+88]
179	mov  edi, [esi+28]
180
181
182
183
184
185
186
187	mov  edx, [esi+4]
188	mov  eax, [esi+0]
189
190	add  edx,eax
191	sub  edx,11
192
193	mov  [esp+44],eax
194	mov  [esp+20],edx
195
196	mov  ebp, [esp+92]
197	mov  ecx, [esi+16]
198	mov  ebx, [esi+12]
199
200	sub  ebp,ecx
201	neg  ebp
202	add  ebp,ebx
203
204	sub  ecx,257
205	add  ecx,ebx
206
207	mov  [esp+60],ebx
208	mov  [esp+40],ebp
209	mov  [esp+16],ecx
210; 285 "inffast.S"
211	mov  eax, [edi+lencode_state]
212	mov  ecx, [edi+distcode_state]
213
214	mov  [esp+8],eax
215	mov  [esp+12],ecx
216
217	mov  eax,1
218	mov  ecx, [edi+lenbits_state]
219	shl  eax,cl
220	dec  eax
221	mov  [esp+0],eax
222
223	mov  eax,1
224	mov  ecx, [edi+distbits_state]
225	shl  eax,cl
226	dec  eax
227	mov  [esp+4],eax
228
229	mov  eax, [edi+wsize_state]
230	mov  ecx, [edi+write_state]
231	mov  edx, [edi+window_state]
232
233	mov  [esp+52],eax
234	mov  [esp+48],ecx
235	mov  [esp+56],edx
236
237	mov  ebp, [edi+hold_state]
238	mov  ebx, [edi+bits_state]
239; 321 "inffast.S"
240	mov  esi, [esp+44]
241	mov  ecx, [esp+20]
242	cmp  ecx,esi
243	ja   L_align_long
244
245	add  ecx,11
246	sub  ecx,esi
247	mov  eax,12
248	sub  eax,ecx
249	lea  edi, [esp+28]
250	rep movsb
251	mov  ecx,eax
252	xor  eax,eax
253	rep stosb
254	lea  esi, [esp+28]
255	mov  [esp+20],esi
256	jmp  L_is_aligned
257
258
259L_align_long:
260	test  esi,3
261	jz   L_is_aligned
262	xor  eax,eax
263	mov  al, [esi]
264	inc  esi
265	mov  ecx,ebx
266	add  ebx,8
267	shl  eax,cl
268	or  ebp,eax
269	jmp L_align_long
270
271L_is_aligned:
272	mov  edi, [esp+60]
273; 366 "inffast.S"
274L_check_mmx:
275	cmp  dword ptr [inflate_fast_use_mmx],2
276	je   L_init_mmx
277	ja   L_do_loop
278
279	push  eax
280	push  ebx
281	push  ecx
282	push  edx
283	pushfd
284	mov  eax, [esp]
285	xor  dword ptr [esp],0200000h
286
287
288
289
290	popfd
291	pushfd
292	pop  edx
293	xor  edx,eax
294	jz   L_dont_use_mmx
295	xor  eax,eax
296	cpuid
297	cmp  ebx,0756e6547h
298	jne  L_dont_use_mmx
299	cmp  ecx,06c65746eh
300	jne  L_dont_use_mmx
301	cmp  edx,049656e69h
302	jne  L_dont_use_mmx
303	mov  eax,1
304	cpuid
305	shr  eax,8
306	and  eax,15
307	cmp  eax,6
308	jne  L_dont_use_mmx
309	test  edx,0800000h
310	jnz  L_use_mmx
311	jmp  L_dont_use_mmx
312L_use_mmx:
313	mov  dword ptr [inflate_fast_use_mmx],2
314	jmp  L_check_mmx_pop
315L_dont_use_mmx:
316	mov  dword ptr [inflate_fast_use_mmx],3
317L_check_mmx_pop:
318	pop  edx
319	pop  ecx
320	pop  ebx
321	pop  eax
322	jmp  L_check_mmx
323; 426 "inffast.S"
324ALIGN 4
325L_do_loop:
326; 437 "inffast.S"
327	cmp  bl,15
328	ja   L_get_length_code
329
330	xor  eax,eax
331	lodsw
332	mov  cl,bl
333	add  bl,16
334	shl  eax,cl
335	or  ebp,eax
336
337L_get_length_code:
338	mov  edx, [esp+0]
339	mov  ecx, [esp+8]
340	and  edx,ebp
341	mov  eax, [ecx+edx*4]
342
343L_dolen:
344
345
346
347
348
349
350	mov  cl,ah
351	sub  bl,ah
352	shr  ebp,cl
353
354
355
356
357
358
359	test  al,al
360	jnz   L_test_for_length_base
361
362	shr  eax,16
363	stosb
364
365L_while_test:
366
367
368	cmp  [esp+16],edi
369	jbe  L_break_loop
370
371	cmp  [esp+20],esi
372	ja   L_do_loop
373	jmp  L_break_loop
374
375L_test_for_length_base:
376; 502 "inffast.S"
377	mov  edx,eax
378	shr  edx,16
379	mov  cl,al
380
381	test  al,16
382	jz   L_test_for_second_level_length
383	and  cl,15
384	jz   L_save_len
385	cmp  bl,cl
386	jae  L_add_bits_to_len
387
388	mov  ch,cl
389	xor  eax,eax
390	lodsw
391	mov  cl,bl
392	add  bl,16
393	shl  eax,cl
394	or  ebp,eax
395	mov  cl,ch
396
397L_add_bits_to_len:
398	mov  eax,1
399	shl  eax,cl
400	dec  eax
401	sub  bl,cl
402	and  eax,ebp
403	shr  ebp,cl
404	add  edx,eax
405
406L_save_len:
407	mov  [esp+24],edx
408
409
410L_decode_distance:
411; 549 "inffast.S"
412	cmp  bl,15
413	ja   L_get_distance_code
414
415	xor  eax,eax
416	lodsw
417	mov  cl,bl
418	add  bl,16
419	shl  eax,cl
420	or  ebp,eax
421
422L_get_distance_code:
423	mov  edx, [esp+4]
424	mov  ecx, [esp+12]
425	and  edx,ebp
426	mov  eax, [ecx+edx*4]
427
428
429L_dodist:
430	mov  edx,eax
431	shr  edx,16
432	mov  cl,ah
433	sub  bl,ah
434	shr  ebp,cl
435; 584 "inffast.S"
436	mov  cl,al
437
438	test  al,16
439	jz  L_test_for_second_level_dist
440	and  cl,15
441	jz  L_check_dist_one
442	cmp  bl,cl
443	jae  L_add_bits_to_dist
444
445	mov  ch,cl
446	xor  eax,eax
447	lodsw
448	mov  cl,bl
449	add  bl,16
450	shl  eax,cl
451	or  ebp,eax
452	mov  cl,ch
453
454L_add_bits_to_dist:
455	mov  eax,1
456	shl  eax,cl
457	dec  eax
458	sub  bl,cl
459	and  eax,ebp
460	shr  ebp,cl
461	add  edx,eax
462	jmp  L_check_window
463
464L_check_window:
465; 625 "inffast.S"
466	mov  [esp+44],esi
467	mov  eax,edi
468	sub  eax, [esp+40]
469
470	cmp  eax,edx
471	jb   L_clip_window
472
473	mov  ecx, [esp+24]
474	mov  esi,edi
475	sub  esi,edx
476
477	sub  ecx,3
478	mov  al, [esi]
479	mov  [edi],al
480	mov  al, [esi+1]
481	mov  dl, [esi+2]
482	add  esi,3
483	mov  [edi+1],al
484	mov  [edi+2],dl
485	add  edi,3
486	rep movsb
487
488	mov  esi, [esp+44]
489	jmp  L_while_test
490
491ALIGN 4
492L_check_dist_one:
493	cmp  edx,1
494	jne  L_check_window
495	cmp  [esp+40],edi
496	je  L_check_window
497
498	dec  edi
499	mov  ecx, [esp+24]
500	mov  al, [edi]
501	sub  ecx,3
502
503	mov  [edi+1],al
504	mov  [edi+2],al
505	mov  [edi+3],al
506	add  edi,4
507	rep stosb
508
509	jmp  L_while_test
510
511ALIGN 4
512L_test_for_second_level_length:
513
514
515
516
517	test  al,64
518	jnz   L_test_for_end_of_block
519
520	mov  eax,1
521	shl  eax,cl
522	dec  eax
523	and  eax,ebp
524	add  eax,edx
525	mov  edx, [esp+8]
526	mov  eax, [edx+eax*4]
527	jmp  L_dolen
528
529ALIGN 4
530L_test_for_second_level_dist:
531
532
533
534
535	test  al,64
536	jnz   L_invalid_distance_code
537
538	mov  eax,1
539	shl  eax,cl
540	dec  eax
541	and  eax,ebp
542	add  eax,edx
543	mov  edx, [esp+12]
544	mov  eax, [edx+eax*4]
545	jmp  L_dodist
546
547ALIGN 4
548L_clip_window:
549; 721 "inffast.S"
550	mov  ecx,eax
551	mov  eax, [esp+52]
552	neg  ecx
553	mov  esi, [esp+56]
554
555	cmp  eax,edx
556	jb   L_invalid_distance_too_far
557
558	add  ecx,edx
559	cmp  dword ptr [esp+48],0
560	jne  L_wrap_around_window
561
562	sub  eax,ecx
563	add  esi,eax
564; 749 "inffast.S"
565	mov  eax, [esp+24]
566	cmp  eax,ecx
567	jbe  L_do_copy1
568
569	sub  eax,ecx
570	rep movsb
571	mov  esi,edi
572	sub  esi,edx
573	jmp  L_do_copy1
574
575	cmp  eax,ecx
576	jbe  L_do_copy1
577
578	sub  eax,ecx
579	rep movsb
580	mov  esi,edi
581	sub  esi,edx
582	jmp  L_do_copy1
583
584L_wrap_around_window:
585; 793 "inffast.S"
586	mov  eax, [esp+48]
587	cmp  ecx,eax
588	jbe  L_contiguous_in_window
589
590	add  esi, [esp+52]
591	add  esi,eax
592	sub  esi,ecx
593	sub  ecx,eax
594
595
596	mov  eax, [esp+24]
597	cmp  eax,ecx
598	jbe  L_do_copy1
599
600	sub  eax,ecx
601	rep movsb
602	mov  esi, [esp+56]
603	mov  ecx, [esp+48]
604	cmp  eax,ecx
605	jbe  L_do_copy1
606
607	sub  eax,ecx
608	rep movsb
609	mov  esi,edi
610	sub  esi,edx
611	jmp  L_do_copy1
612
613L_contiguous_in_window:
614; 836 "inffast.S"
615	add  esi,eax
616	sub  esi,ecx
617
618
619	mov  eax, [esp+24]
620	cmp  eax,ecx
621	jbe  L_do_copy1
622
623	sub  eax,ecx
624	rep movsb
625	mov  esi,edi
626	sub  esi,edx
627
628L_do_copy1:
629; 862 "inffast.S"
630	mov  ecx,eax
631	rep movsb
632
633	mov  esi, [esp+44]
634	jmp  L_while_test
635; 878 "inffast.S"
636ALIGN 4
637L_init_mmx:
638	emms
639
640
641
642
643
644	movd mm0,ebp
645	mov  ebp,ebx
646; 896 "inffast.S"
647	movd mm4,dword ptr [esp+0]
648	movq mm3,mm4
649	movd mm5,dword ptr [esp+4]
650	movq mm2,mm5
651	pxor mm1,mm1
652	mov  ebx, [esp+8]
653	jmp  L_do_loop_mmx
654
655ALIGN 4
656L_do_loop_mmx:
657	psrlq mm0,mm1
658
659	cmp  ebp,32
660	ja  L_get_length_code_mmx
661
662	movd mm6,ebp
663	movd mm7,dword ptr [esi]
664	add  esi,4
665	psllq mm7,mm6
666	add  ebp,32
667	por mm0,mm7
668
669L_get_length_code_mmx:
670	pand mm4,mm0
671	movd eax,mm4
672	movq mm4,mm3
673	mov  eax, [ebx+eax*4]
674
675L_dolen_mmx:
676	movzx  ecx,ah
677	movd mm1,ecx
678	sub  ebp,ecx
679
680	test  al,al
681	jnz L_test_for_length_base_mmx
682
683	shr  eax,16
684	stosb
685
686L_while_test_mmx:
687
688
689	cmp  [esp+16],edi
690	jbe L_break_loop
691
692	cmp  [esp+20],esi
693	ja L_do_loop_mmx
694	jmp L_break_loop
695
696L_test_for_length_base_mmx:
697
698	mov  edx,eax
699	shr  edx,16
700
701	test  al,16
702	jz  L_test_for_second_level_length_mmx
703	and  eax,15
704	jz L_decode_distance_mmx
705
706	psrlq mm0,mm1
707	movd mm1,eax
708	movd ecx,mm0
709	sub  ebp,eax
710	and  ecx, [inflate_fast_mask+eax*4]
711	add  edx,ecx
712
713L_decode_distance_mmx:
714	psrlq mm0,mm1
715
716	cmp  ebp,32
717	ja L_get_dist_code_mmx
718
719	movd mm6,ebp
720	movd mm7,dword ptr [esi]
721	add  esi,4
722	psllq mm7,mm6
723	add  ebp,32
724	por mm0,mm7
725
726L_get_dist_code_mmx:
727	mov  ebx, [esp+12]
728	pand mm5,mm0
729	movd eax,mm5
730	movq mm5,mm2
731	mov  eax, [ebx+eax*4]
732
733L_dodist_mmx:
734
735	movzx  ecx,ah
736	mov  ebx,eax
737	shr  ebx,16
738	sub  ebp,ecx
739	movd mm1,ecx
740
741	test  al,16
742	jz L_test_for_second_level_dist_mmx
743	and  eax,15
744	jz L_check_dist_one_mmx
745
746L_add_bits_to_dist_mmx:
747	psrlq mm0,mm1
748	movd mm1,eax
749	movd ecx,mm0
750	sub  ebp,eax
751	and  ecx, [inflate_fast_mask+eax*4]
752	add  ebx,ecx
753
754L_check_window_mmx:
755	mov  [esp+44],esi
756	mov  eax,edi
757	sub  eax, [esp+40]
758
759	cmp  eax,ebx
760	jb L_clip_window_mmx
761
762	mov  ecx,edx
763	mov  esi,edi
764	sub  esi,ebx
765
766	sub  ecx,3
767	mov  al, [esi]
768	mov  [edi],al
769	mov  al, [esi+1]
770	mov  dl, [esi+2]
771	add  esi,3
772	mov  [edi+1],al
773	mov  [edi+2],dl
774	add  edi,3
775	rep movsb
776
777	mov  esi, [esp+44]
778	mov  ebx, [esp+8]
779	jmp  L_while_test_mmx
780
781ALIGN 4
782L_check_dist_one_mmx:
783	cmp  ebx,1
784	jne  L_check_window_mmx
785	cmp  [esp+40],edi
786	je   L_check_window_mmx
787
788	dec  edi
789	mov  ecx,edx
790	mov  al, [edi]
791	sub  ecx,3
792
793	mov  [edi+1],al
794	mov  [edi+2],al
795	mov  [edi+3],al
796	add  edi,4
797	rep stosb
798
799	mov  ebx, [esp+8]
800	jmp  L_while_test_mmx
801
802ALIGN 4
803L_test_for_second_level_length_mmx:
804	test  al,64
805	jnz L_test_for_end_of_block
806
807	and  eax,15
808	psrlq mm0,mm1
809	movd ecx,mm0
810	and  ecx, [inflate_fast_mask+eax*4]
811	add  ecx,edx
812	mov  eax, [ebx+ecx*4]
813	jmp L_dolen_mmx
814
815ALIGN 4
816L_test_for_second_level_dist_mmx:
817	test  al,64
818	jnz L_invalid_distance_code
819
820	and  eax,15
821	psrlq mm0,mm1
822	movd ecx,mm0
823	and  ecx, [inflate_fast_mask+eax*4]
824	mov  eax, [esp+12]
825	add  ecx,ebx
826	mov  eax, [eax+ecx*4]
827	jmp  L_dodist_mmx
828
829ALIGN 4
830L_clip_window_mmx:
831
832	mov  ecx,eax
833	mov  eax, [esp+52]
834	neg  ecx
835	mov  esi, [esp+56]
836
837	cmp  eax,ebx
838	jb  L_invalid_distance_too_far
839
840	add  ecx,ebx
841	cmp  dword ptr [esp+48],0
842	jne  L_wrap_around_window_mmx
843
844	sub  eax,ecx
845	add  esi,eax
846
847	cmp  edx,ecx
848	jbe  L_do_copy1_mmx
849
850	sub  edx,ecx
851	rep movsb
852	mov  esi,edi
853	sub  esi,ebx
854	jmp  L_do_copy1_mmx
855
856	cmp  edx,ecx
857	jbe  L_do_copy1_mmx
858
859	sub  edx,ecx
860	rep movsb
861	mov  esi,edi
862	sub  esi,ebx
863	jmp  L_do_copy1_mmx
864
865L_wrap_around_window_mmx:
866
867	mov  eax, [esp+48]
868	cmp  ecx,eax
869	jbe  L_contiguous_in_window_mmx
870
871	add  esi, [esp+52]
872	add  esi,eax
873	sub  esi,ecx
874	sub  ecx,eax
875
876
877	cmp  edx,ecx
878	jbe  L_do_copy1_mmx
879
880	sub  edx,ecx
881	rep movsb
882	mov  esi, [esp+56]
883	mov  ecx, [esp+48]
884	cmp  edx,ecx
885	jbe  L_do_copy1_mmx
886
887	sub  edx,ecx
888	rep movsb
889	mov  esi,edi
890	sub  esi,ebx
891	jmp  L_do_copy1_mmx
892
893L_contiguous_in_window_mmx:
894
895	add  esi,eax
896	sub  esi,ecx
897
898
899	cmp  edx,ecx
900	jbe  L_do_copy1_mmx
901
902	sub  edx,ecx
903	rep movsb
904	mov  esi,edi
905	sub  esi,ebx
906
907L_do_copy1_mmx:
908
909
910	mov  ecx,edx
911	rep movsb
912
913	mov  esi, [esp+44]
914	mov  ebx, [esp+8]
915	jmp  L_while_test_mmx
916; 1174 "inffast.S"
917L_invalid_distance_code:
918
919
920
921
922
923	mov  ecx, invalid_distance_code_msg
924	mov  edx,INFLATE_MODE_BAD
925	jmp  L_update_stream_state
926
927L_test_for_end_of_block:
928
929
930
931
932
933	test  al,32
934	jz  L_invalid_literal_length_code
935
936	mov  ecx,0
937	mov  edx,INFLATE_MODE_TYPE
938	jmp  L_update_stream_state
939
940L_invalid_literal_length_code:
941
942
943
944
945
946	mov  ecx, invalid_literal_length_code_msg
947	mov  edx,INFLATE_MODE_BAD
948	jmp  L_update_stream_state
949
950L_invalid_distance_too_far:
951
952
953
954	mov  esi, [esp+44]
955	mov  ecx, invalid_distance_too_far_msg
956	mov  edx,INFLATE_MODE_BAD
957	jmp  L_update_stream_state
958
959L_update_stream_state:
960
961	mov  eax, [esp+88]
962	test  ecx,ecx
963	jz  L_skip_msg
964	mov  [eax+24],ecx
965L_skip_msg:
966	mov  eax, [eax+28]
967	mov  [eax+mode_state],edx
968	jmp  L_break_loop
969
970ALIGN 4
971L_break_loop:
972; 1243 "inffast.S"
973	cmp  dword ptr [inflate_fast_use_mmx],2
974	jne  L_update_next_in
975
976
977
978	mov  ebx,ebp
979
980L_update_next_in:
981; 1266 "inffast.S"
982	mov  eax, [esp+88]
983	mov  ecx,ebx
984	mov  edx, [eax+28]
985	shr  ecx,3
986	sub  esi,ecx
987	shl  ecx,3
988	sub  ebx,ecx
989	mov  [eax+12],edi
990	mov  [edx+bits_state],ebx
991	mov  ecx,ebx
992
993	lea  ebx, [esp+28]
994	cmp  [esp+20],ebx
995	jne  L_buf_not_used
996
997	sub  esi,ebx
998	mov  ebx, [eax+0]
999	mov  [esp+20],ebx
1000	add  esi,ebx
1001	mov  ebx, [eax+4]
1002	sub  ebx,11
1003	add  [esp+20],ebx
1004
1005L_buf_not_used:
1006	mov  [eax+0],esi
1007
1008	mov  ebx,1
1009	shl  ebx,cl
1010	dec  ebx
1011
1012
1013
1014
1015
1016	cmp  dword ptr [inflate_fast_use_mmx],2
1017	jne  L_update_hold
1018
1019
1020
1021	psrlq mm0,mm1
1022	movd ebp,mm0
1023
1024	emms
1025
1026L_update_hold:
1027
1028
1029
1030	and  ebp,ebx
1031	mov  [edx+hold_state],ebp
1032
1033
1034
1035
1036	mov  ebx, [esp+20]
1037	cmp  ebx,esi
1038	jbe  L_last_is_smaller
1039
1040	sub  ebx,esi
1041	add  ebx,11
1042	mov  [eax+4],ebx
1043	jmp  L_fixup_out
1044L_last_is_smaller:
1045	sub  esi,ebx
1046	neg  esi
1047	add  esi,11
1048	mov  [eax+4],esi
1049
1050
1051
1052
1053L_fixup_out:
1054
1055	mov  ebx, [esp+16]
1056	cmp  ebx,edi
1057	jbe  L_end_is_smaller
1058
1059	sub  ebx,edi
1060	add  ebx,257
1061	mov  [eax+16],ebx
1062	jmp  L_done
1063L_end_is_smaller:
1064	sub  edi,ebx
1065	neg  edi
1066	add  edi,257
1067	mov  [eax+16],edi
1068
1069
1070
1071
1072
1073L_done:
1074	add  esp,64
1075	popfd
1076	pop  ebx
1077	pop  ebp
1078	pop  esi
1079	pop  edi
1080	ret
1081
1082_TEXT	ends
1083end
1084