• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2; x86 format converters for HERMES
3; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
4; This source code is licensed under the GNU LGPL
5;
6; Please refer to the file COPYING.LIB contained in the distribution for
7; licensing conditions
8;
9; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
10;
11
12BITS 32
13
14%include "common.inc"
15
16SDL_FUNC _ConvertX86p32_32BGR888
17SDL_FUNC _ConvertX86p32_32RGBA888
18SDL_FUNC _ConvertX86p32_32BGRA888
19SDL_FUNC _ConvertX86p32_24RGB888
20SDL_FUNC _ConvertX86p32_24BGR888
21SDL_FUNC _ConvertX86p32_16RGB565
22SDL_FUNC _ConvertX86p32_16BGR565
23SDL_FUNC _ConvertX86p32_16RGB555
24SDL_FUNC _ConvertX86p32_16BGR555
25SDL_FUNC _ConvertX86p32_8RGB332
26
27SECTION .text
28
29;; _Convert_*
30;; Paramters:
31;;   ESI = source
32;;   EDI = dest
33;;   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
34;; Destroys:
35;;   EAX, EBX, EDX
36
37
38_ConvertX86p32_32BGR888:
39
40    ; check short
41    cmp ecx,BYTE 32
42    ja .L3
43
44.L1: ; short loop
45    mov edx,[esi]
46    bswap edx
47    ror edx,8
48    mov [edi],edx
49    add esi,BYTE 4
50    add edi,BYTE 4
51    dec ecx
52    jnz .L1
53.L2:
54    retn
55
56.L3: ; save ebp
57    push ebp
58
59    ; unroll four times
60    mov ebp,ecx
61    shr ebp,2
62
63    ; save count
64    push ecx
65
66.L4:    mov eax,[esi]
67        mov ebx,[esi+4]
68
69        bswap eax
70
71        bswap ebx
72
73        ror eax,8
74        mov ecx,[esi+8]
75
76        ror ebx,8
77        mov edx,[esi+12]
78
79        bswap ecx
80
81        bswap edx
82
83        ror ecx,8
84        mov [edi+0],eax
85
86        ror edx,8
87        mov [edi+4],ebx
88
89        mov [edi+8],ecx
90        mov [edi+12],edx
91
92        add esi,BYTE 16
93        add edi,BYTE 16
94
95        dec ebp
96        jnz .L4
97
98    ; check tail
99    pop ecx
100    and ecx,BYTE 11b
101    jz .L6
102
103.L5: ; tail loop
104    mov edx,[esi]
105    bswap edx
106    ror edx,8
107    mov [edi],edx
108    add esi,BYTE 4
109    add edi,BYTE 4
110    dec ecx
111    jnz .L5
112
113.L6: pop ebp
114    retn
115
116
117
118
119_ConvertX86p32_32RGBA888:
120
121    ; check short
122    cmp ecx,BYTE 32
123    ja .L3
124
125.L1: ; short loop
126    mov edx,[esi]
127    rol edx,8
128    mov [edi],edx
129    add esi,BYTE 4
130    add edi,BYTE 4
131    dec ecx
132    jnz .L1
133.L2:
134    retn
135
136.L3: ; save ebp
137    push ebp
138
139    ; unroll four times
140    mov ebp,ecx
141    shr ebp,2
142
143    ; save count
144    push ecx
145
146.L4:    mov eax,[esi]
147        mov ebx,[esi+4]
148
149        rol eax,8
150        mov ecx,[esi+8]
151
152        rol ebx,8
153        mov edx,[esi+12]
154
155        rol ecx,8
156        mov [edi+0],eax
157
158        rol edx,8
159        mov [edi+4],ebx
160
161        mov [edi+8],ecx
162        mov [edi+12],edx
163
164        add esi,BYTE 16
165        add edi,BYTE 16
166
167        dec ebp
168        jnz .L4
169
170    ; check tail
171    pop ecx
172    and ecx,BYTE 11b
173    jz .L6
174
175.L5: ; tail loop
176    mov edx,[esi]
177    rol edx,8
178    mov [edi],edx
179    add esi,BYTE 4
180    add edi,BYTE 4
181    dec ecx
182    jnz .L5
183
184.L6: pop ebp
185    retn
186
187
188
189
190_ConvertX86p32_32BGRA888:
191
192    ; check short
193    cmp ecx,BYTE 32
194    ja .L3
195
196.L1: ; short loop
197    mov edx,[esi]
198    bswap edx
199    mov [edi],edx
200    add esi,BYTE 4
201    add edi,BYTE 4
202    dec ecx
203    jnz .L1
204.L2:
205    retn
206
207.L3: ; save ebp
208    push ebp
209
210    ; unroll four times
211    mov ebp,ecx
212    shr ebp,2
213
214    ; save count
215    push ecx
216
217.L4:    mov eax,[esi]
218        mov ebx,[esi+4]
219
220        mov ecx,[esi+8]
221        mov edx,[esi+12]
222
223        bswap eax
224
225        bswap ebx
226
227        bswap ecx
228
229        bswap edx
230
231        mov [edi+0],eax
232        mov [edi+4],ebx
233
234        mov [edi+8],ecx
235        mov [edi+12],edx
236
237        add esi,BYTE 16
238        add edi,BYTE 16
239
240        dec ebp
241        jnz .L4
242
243    ; check tail
244    pop ecx
245    and ecx,BYTE 11b
246    jz .L6
247
248.L5: ; tail loop
249    mov edx,[esi]
250    bswap edx
251    mov [edi],edx
252    add esi,BYTE 4
253    add edi,BYTE 4
254    dec ecx
255    jnz .L5
256
257.L6: pop ebp
258    retn
259
260
261
262
263;; 32 bit RGB 888 to 24 BIT RGB 888
264
265_ConvertX86p32_24RGB888:
266
267	; check short
268	cmp ecx,BYTE 32
269	ja .L3
270
271.L1:	; short loop
272	mov al,[esi]
273	mov bl,[esi+1]
274	mov dl,[esi+2]
275	mov [edi],al
276	mov [edi+1],bl
277	mov [edi+2],dl
278	add esi,BYTE 4
279	add edi,BYTE 3
280	dec ecx
281	jnz .L1
282.L2:
283	retn
284
285.L3:	;	 head
286	mov edx,edi
287	and edx,BYTE 11b
288	jz .L4
289	mov al,[esi]
290	mov bl,[esi+1]
291	mov dl,[esi+2]
292	mov [edi],al
293	mov [edi+1],bl
294	mov [edi+2],dl
295	add esi,BYTE 4
296	add edi,BYTE 3
297	dec ecx
298	jmp SHORT .L3
299
300.L4: ; unroll 4 times
301	push ebp
302	mov ebp,ecx
303	shr ebp,2
304
305    ; save count
306	push ecx
307
308.L5:    mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
309        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
310
311        shl eax,8                       ;                        eax = [R][G][B][.]
312        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
313
314        shl ebx,8                       ;                        ebx = [r][g][b][.]
315        mov al,[esi+4]                  ;                        eax = [R][G][B][b]
316
317        ror eax,8                       ;                        eax = [b][R][G][B] (done)
318        mov bh,[esi+8+1]                ;                        ebx = [r][g][G][.]
319
320        mov [edi],eax
321        add edi,BYTE 3*4
322
323        shl ecx,8                       ;                        ecx = [r][g][b][.]
324        mov bl,[esi+8+0]                ;                        ebx = [r][g][G][B]
325
326        rol ebx,16                      ;                        ebx = [G][B][r][g] (done)
327        mov cl,[esi+8+2]                ;                        ecx = [r][g][b][R] (done)
328
329        mov [edi+4-3*4],ebx
330        add esi,BYTE 4*4
331
332        mov [edi+8-3*4],ecx
333        dec ebp
334
335        jnz .L5
336
337    ; check tail
338	pop ecx
339	and ecx,BYTE 11b
340	jz .L7
341
342.L6: ; tail loop
343	mov al,[esi]
344	mov bl,[esi+1]
345	mov dl,[esi+2]
346	mov [edi],al
347	mov [edi+1],bl
348	mov [edi+2],dl
349	add esi,BYTE 4
350	add edi,BYTE 3
351	dec ecx
352	jnz .L6
353
354.L7:	pop ebp
355	retn
356
357
358
359
360;; 32 bit RGB 888 to 24 bit BGR 888
361
362_ConvertX86p32_24BGR888:
363
364	; check short
365	cmp ecx,BYTE 32
366	ja .L3
367
368.L1:	; short loop
369	mov dl,[esi]
370	mov bl,[esi+1]
371	mov al,[esi+2]
372	mov [edi],al
373	mov [edi+1],bl
374	mov [edi+2],dl
375	add esi,BYTE 4
376	add edi,BYTE 3
377	dec ecx
378	jnz .L1
379.L2:
380	retn
381
382.L3: ; head
383	mov edx,edi
384	and edx,BYTE 11b
385	jz .L4
386	mov dl,[esi]
387	mov bl,[esi+1]
388	mov al,[esi+2]
389	mov [edi],al
390	mov [edi+1],bl
391	mov [edi+2],dl
392	add esi,BYTE 4
393	add edi,BYTE 3
394	dec ecx
395	jmp SHORT .L3
396
397.L4:	; unroll 4 times
398	push ebp
399	mov ebp,ecx
400	shr ebp,2
401
402	; save count
403	push ecx
404
405.L5:
406	mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
407        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
408
409        bswap eax                       ;                        eax = [B][G][R][A]
410
411        bswap ebx                       ;                        ebx = [b][g][r][a]
412
413        mov al,[esi+4+2]                ;                        eax = [B][G][R][r]
414        mov bh,[esi+4+4+1]              ;                        ebx = [b][g][G][a]
415
416        ror eax,8                       ;                        eax = [r][B][G][R] (done)
417        mov bl,[esi+4+4+2]              ;                        ebx = [b][g][G][R]
418
419        ror ebx,16                      ;                        ebx = [G][R][b][g] (done)
420        mov [edi],eax
421
422        mov [edi+4],ebx
423        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
424
425        bswap ecx                       ;                        ecx = [b][g][r][a]
426
427        mov cl,[esi+8]                  ;                        ecx = [b][g][r][B] (done)
428        add esi,BYTE 4*4
429
430        mov [edi+8],ecx
431        add edi,BYTE 3*4
432
433        dec ebp
434        jnz .L5
435
436	; check tail
437	pop ecx
438	and ecx,BYTE 11b
439	jz .L7
440
441.L6:	; tail loop
442	mov dl,[esi]
443	mov bl,[esi+1]
444	mov al,[esi+2]
445	mov [edi],al
446	mov [edi+1],bl
447	mov [edi+2],dl
448	add esi,BYTE 4
449	add edi,BYTE 3
450	dec ecx
451	jnz .L6
452
453.L7:
454	pop ebp
455	retn
456
457
458
459
460;; 32 bit RGB 888 to 16 BIT RGB 565
461
462_ConvertX86p32_16RGB565:
463	; check short
464	cmp ecx,BYTE 16
465	ja .L3
466
467.L1: ; short loop
468	mov bl,[esi+0]    ; blue
469	mov al,[esi+1]    ; green
470	mov ah,[esi+2]    ; red
471	shr ah,3
472        and al,11111100b
473	shl eax,3
474	shr bl,3
475	add al,bl
476	mov [edi+0],al
477	mov [edi+1],ah
478	add esi,BYTE 4
479	add edi,BYTE 2
480	dec ecx
481	jnz .L1
482
483.L2:				; End of short loop
484	retn
485
486
487.L3:	; head
488	mov ebx,edi
489	and ebx,BYTE 11b
490	jz .L4
491
492	mov bl,[esi+0]    ; blue
493	mov al,[esi+1]    ; green
494	mov ah,[esi+2]    ; red
495	shr ah,3
496	and al,11111100b
497	shl eax,3
498	shr bl,3
499	add al,bl
500	mov [edi+0],al
501	mov [edi+1],ah
502	add esi,BYTE 4
503	add edi,BYTE 2
504	dec ecx
505
506.L4:
507    ; save count
508	push ecx
509
510    ; unroll twice
511	shr ecx,1
512
513    ; point arrays to end
514	lea esi,[esi+ecx*8]
515	lea edi,[edi+ecx*4]
516
517    ; negative counter
518	neg ecx
519	jmp SHORT .L6
520
521.L5:
522	mov [edi+ecx*4-4],eax
523.L6:
524	mov eax,[esi+ecx*8]
525
526        shr ah,2
527        mov ebx,[esi+ecx*8+4]
528
529        shr eax,3
530        mov edx,[esi+ecx*8+4]
531
532        shr bh,2
533        mov dl,[esi+ecx*8+2]
534
535        shl ebx,13
536        and eax,000007FFh
537
538        shl edx,8
539        and ebx,07FF0000h
540
541        and edx,0F800F800h
542        add eax,ebx
543
544        add eax,edx
545        inc ecx
546
547        jnz .L5
548
549	mov [edi+ecx*4-4],eax
550
551    ; tail
552	pop ecx
553	test cl,1
554	jz .L7
555
556	mov bl,[esi+0]    ; blue
557	mov al,[esi+1]    ; green
558	mov ah,[esi+2]    ; red
559	shr ah,3
560	and al,11111100b
561	shl eax,3
562	shr bl,3
563	add al,bl
564	mov [edi+0],al
565	mov [edi+1],ah
566	add esi,BYTE 4
567	add edi,BYTE 2
568
569.L7:
570	retn
571
572
573
574
575;; 32 bit RGB 888 to 16 BIT BGR 565
576
577_ConvertX86p32_16BGR565:
578
579	; check short
580	cmp ecx,BYTE 16
581	ja .L3
582
583.L1:	; short loop
584	mov ah,[esi+0]    ; blue
585	mov al,[esi+1]    ; green
586	mov bl,[esi+2]    ; red
587	shr ah,3
588	and al,11111100b
589	shl eax,3
590	shr bl,3
591	add al,bl
592	mov [edi+0],al
593	mov [edi+1],ah
594	add esi,BYTE 4
595	add edi,BYTE 2
596	dec ecx
597	jnz .L1
598.L2:
599	retn
600
601.L3:	; head
602	mov ebx,edi
603	and ebx,BYTE 11b
604	jz .L4
605	mov ah,[esi+0]    ; blue
606	mov al,[esi+1]    ; green
607	mov bl,[esi+2]    ; red
608	shr ah,3
609	and al,11111100b
610	shl eax,3
611	shr bl,3
612	add al,bl
613	mov [edi+0],al
614	mov [edi+1],ah
615	add esi,BYTE 4
616	add edi,BYTE 2
617	dec ecx
618
619.L4:	; save count
620	push ecx
621
622	; unroll twice
623	shr ecx,1
624
625	; point arrays to end
626	lea esi,[esi+ecx*8]
627	lea edi,[edi+ecx*4]
628
629	; negative count
630	neg ecx
631	jmp SHORT .L6
632
633.L5:
634	mov [edi+ecx*4-4],eax
635.L6:
636	mov edx,[esi+ecx*8+4]
637
638        mov bh,[esi+ecx*8+4]
639        mov ah,[esi+ecx*8]
640
641        shr bh,3
642        mov al,[esi+ecx*8+1]
643
644        shr ah,3
645        mov bl,[esi+ecx*8+5]
646
647        shl eax,3
648        mov dl,[esi+ecx*8+2]
649
650        shl ebx,19
651        and eax,0000FFE0h
652
653        shr edx,3
654        and ebx,0FFE00000h
655
656        and edx,001F001Fh
657        add eax,ebx
658
659        add eax,edx
660        inc ecx
661
662        jnz .L5
663
664	mov [edi+ecx*4-4],eax
665
666	; tail
667	pop ecx
668	and ecx,BYTE 1
669	jz .L7
670	mov ah,[esi+0]    ; blue
671	mov al,[esi+1]    ; green
672	mov bl,[esi+2]    ; red
673	shr ah,3
674	and al,11111100b
675	shl eax,3
676	shr bl,3
677	add al,bl
678	mov [edi+0],al
679	mov [edi+1],ah
680	add esi,BYTE 4
681	add edi,BYTE 2
682
683.L7:
684	retn
685
686
687
688
689;; 32 BIT RGB TO 16 BIT RGB 555
690
691_ConvertX86p32_16RGB555:
692
693	; check short
694	cmp ecx,BYTE 16
695	ja .L3
696
697.L1:	; short loop
698	mov bl,[esi+0]    ; blue
699	mov al,[esi+1]    ; green
700	mov ah,[esi+2]    ; red
701	shr ah,3
702	and al,11111000b
703	shl eax,2
704	shr bl,3
705	add al,bl
706	mov [edi+0],al
707	mov [edi+1],ah
708	add esi,BYTE 4
709	add edi,BYTE 2
710	dec ecx
711	jnz .L1
712.L2:
713	retn
714
715.L3:	; head
716	mov ebx,edi
717        and ebx,BYTE 11b
718	jz .L4
719	mov bl,[esi+0]    ; blue
720	mov al,[esi+1]    ; green
721	mov ah,[esi+2]    ; red
722	shr ah,3
723	and al,11111000b
724	shl eax,2
725	shr bl,3
726	add al,bl
727	mov [edi+0],al
728	mov [edi+1],ah
729	add esi,BYTE 4
730	add edi,BYTE 2
731	dec ecx
732
733.L4:	; save count
734	push ecx
735
736	; unroll twice
737	shr ecx,1
738
739	; point arrays to end
740	lea esi,[esi+ecx*8]
741	lea edi,[edi+ecx*4]
742
743	; negative counter
744	neg ecx
745	jmp SHORT .L6
746
747.L5:
748	mov [edi+ecx*4-4],eax
749.L6:
750	mov eax,[esi+ecx*8]
751
752        shr ah,3
753        mov ebx,[esi+ecx*8+4]
754
755        shr eax,3
756        mov edx,[esi+ecx*8+4]
757
758        shr bh,3
759        mov dl,[esi+ecx*8+2]
760
761        shl ebx,13
762        and eax,000007FFh
763
764        shl edx,7
765        and ebx,07FF0000h
766
767        and edx,07C007C00h
768        add eax,ebx
769
770        add eax,edx
771        inc ecx
772
773        jnz .L5
774
775	mov [edi+ecx*4-4],eax
776
777	; tail
778	pop ecx
779	and ecx,BYTE 1
780	jz .L7
781	mov bl,[esi+0]    ; blue
782	mov al,[esi+1]    ; green
783	mov ah,[esi+2]    ; red
784	shr ah,3
785	and al,11111000b
786	shl eax,2
787	shr bl,3
788	add al,bl
789	mov [edi+0],al
790	mov [edi+1],ah
791	add esi,BYTE 4
792	add edi,BYTE 2
793
794.L7:
795	retn
796
797
798
799
800;; 32 BIT RGB TO 16 BIT BGR 555
801
802_ConvertX86p32_16BGR555:
803
804	; check short
805	cmp ecx,BYTE 16
806	ja .L3
807
808
809.L1:	; short loop
810	mov ah,[esi+0]    ; blue
811	mov al,[esi+1]    ; green
812	mov bl,[esi+2]    ; red
813	shr ah,3
814	and al,11111000b
815	shl eax,2
816	shr bl,3
817	add al,bl
818	mov [edi+0],al
819	mov [edi+1],ah
820	add esi,BYTE 4
821	add edi,BYTE 2
822	dec ecx
823	jnz .L1
824.L2:
825	retn
826
827.L3:	; head
828	mov ebx,edi
829        and ebx,BYTE 11b
830	jz .L4
831	mov ah,[esi+0]    ; blue
832	mov al,[esi+1]    ; green
833	mov bl,[esi+2]    ; red
834	shr ah,3
835	and al,11111000b
836	shl eax,2
837	shr bl,3
838	add al,bl
839	mov [edi+0],al
840	mov [edi+1],ah
841	add esi,BYTE 4
842	add edi,BYTE 2
843	dec ecx
844
845.L4:	; save count
846	push ecx
847
848	; unroll twice
849	shr ecx,1
850
851	; point arrays to end
852	lea esi,[esi+ecx*8]
853	lea edi,[edi+ecx*4]
854
855	; negative counter
856	neg ecx
857	jmp SHORT .L6
858
859.L5:
860	mov [edi+ecx*4-4],eax
861.L6:
862	mov edx,[esi+ecx*8+4]
863
864        mov bh,[esi+ecx*8+4]
865        mov ah,[esi+ecx*8]
866
867        shr bh,3
868        mov al,[esi+ecx*8+1]
869
870        shr ah,3
871        mov bl,[esi+ecx*8+5]
872
873        shl eax,2
874        mov dl,[esi+ecx*8+2]
875
876        shl ebx,18
877        and eax,00007FE0h
878
879        shr edx,3
880        and ebx,07FE00000h
881
882        and edx,001F001Fh
883        add eax,ebx
884
885        add eax,edx
886        inc ecx
887
888        jnz .L5
889
890	mov [edi+ecx*4-4],eax
891
892	; tail
893	pop ecx
894	and ecx,BYTE 1
895	jz .L7
896	mov ah,[esi+0]    ; blue
897	mov al,[esi+1]    ; green
898	mov bl,[esi+2]    ; red
899	shr ah,3
900	and al,11111000b
901	shl eax,2
902	shr bl,3
903	add al,bl
904	mov [edi+0],al
905	mov [edi+1],ah
906	add esi,BYTE 4
907	add edi,BYTE 2
908
909.L7:
910	retn
911
912
913
914
915
916;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
917;; This routine writes FOUR pixels at once (dword) and then, if they exist
918;; the trailing three pixels
919_ConvertX86p32_8RGB332:
920
921
922.L_ALIGNED:
923	push ecx
924
925	shr ecx,2		; We will draw 4 pixels at once
926	jnz .L1
927
928	jmp .L2			; short jump out of range :(
929
930.L1:
931	mov eax,[esi]		; first pair of pixels
932	mov edx,[esi+4]
933
934	shr dl,6
935	mov ebx,eax
936
937	shr al,6
938	and ah,0e0h
939
940	shr ebx,16
941	and dh,0e0h
942
943	shr ah,3
944	and bl,0e0h
945
946	shr dh,3
947
948	or al,bl
949
950	mov ebx,edx
951	or al,ah
952
953	shr ebx,16
954	or dl,dh
955
956	and bl,0e0h
957
958	or dl,bl
959
960	mov ah,dl
961
962
963
964	mov ebx,[esi+8]		; second pair of pixels
965
966	mov edx,ebx
967	and bh,0e0h
968
969	shr bl,6
970	and edx,0e00000h
971
972	shr edx,16
973
974	shr bh,3
975
976	ror eax,16
977	or bl,dl
978
979	mov edx,[esi+12]
980	or bl,bh
981
982	mov al,bl
983
984	mov ebx,edx
985	and dh,0e0h
986
987	shr dl,6
988	and ebx,0e00000h
989
990	shr dh,3
991	mov ah,dl
992
993	shr ebx,16
994	or ah,dh
995
996	or ah,bl
997
998	rol eax,16
999	add esi,BYTE 16
1000
1001	mov [edi],eax
1002	add edi,BYTE 4
1003
1004	dec ecx
1005	jz .L2			; L1 out of range for short jump :(
1006
1007	jmp .L1
1008.L2:
1009
1010	pop ecx
1011	and ecx,BYTE 3		; mask out number of pixels to draw
1012
1013	jz .L4			; Nothing to do anymore
1014
1015.L3:
1016	mov eax,[esi]		; single pixel conversion for trailing pixels
1017
1018        mov ebx,eax
1019
1020        shr al,6
1021        and ah,0e0h
1022
1023        shr ebx,16
1024
1025        shr ah,3
1026        and bl,0e0h
1027
1028        or al,ah
1029        or al,bl
1030
1031        mov [edi],al
1032
1033        inc edi
1034        add esi,BYTE 4
1035
1036	dec ecx
1037	jnz .L3
1038
1039.L4:
1040	retn
1041
1042%ifidn __OUTPUT_FORMAT__,elf32
1043section .note.GNU-stack noalloc noexec nowrite progbits
1044%endif
1045