• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2; x86 format converters for HERMES
3; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
4; This source code is licensed under the GNU LGPL
5;
6; Please refer to the file COPYING.LIB contained in the distribution for
7; licensing conditions
8;
9; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
10;
11
12BITS 32
13
14%include "common.inc"
15
16SDL_FUNC _ConvertX86p32_32BGR888
17SDL_FUNC _ConvertX86p32_32RGBA888
18SDL_FUNC _ConvertX86p32_32BGRA888
19SDL_FUNC _ConvertX86p32_24RGB888
20SDL_FUNC _ConvertX86p32_24BGR888
21SDL_FUNC _ConvertX86p32_16RGB565
22SDL_FUNC _ConvertX86p32_16BGR565
23SDL_FUNC _ConvertX86p32_16RGB555
24SDL_FUNC _ConvertX86p32_16BGR555
25SDL_FUNC _ConvertX86p32_8RGB332
26
27SECTION .text
28
29;; _Convert_*
30;; Paramters:
31;;   ESI = source
32;;   EDI = dest
33;;   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
34;; Destroys:
35;;   EAX, EBX, EDX
36
37
38_ConvertX86p32_32BGR888:
39
40    ; check short
41    cmp ecx,BYTE 32
42    ja .L3
43
44.L1 ; short loop
45    mov edx,[esi]
46    bswap edx
47    ror edx,8
48    mov [edi],edx
49    add esi,BYTE 4
50    add edi,BYTE 4
51    dec ecx
52    jnz .L1
53.L2
54    retn
55
56.L3 ; save ebp
57    push ebp
58
59    ; unroll four times
60    mov ebp,ecx
61    shr ebp,2
62
63    ; save count
64    push ecx
65
66.L4     mov eax,[esi]
67        mov ebx,[esi+4]
68
69        bswap eax
70
71        bswap ebx
72
73        ror eax,8
74        mov ecx,[esi+8]
75
76        ror ebx,8
77        mov edx,[esi+12]
78
79        bswap ecx
80
81        bswap edx
82
83        ror ecx,8
84        mov [edi+0],eax
85
86        ror edx,8
87        mov [edi+4],ebx
88
89        mov [edi+8],ecx
90        mov [edi+12],edx
91
92        add esi,BYTE 16
93        add edi,BYTE 16
94
95        dec ebp
96        jnz .L4
97
98    ; check tail
99    pop ecx
100    and ecx,BYTE 11b
101    jz .L6
102
103.L5 ; tail loop
104    mov edx,[esi]
105    bswap edx
106    ror edx,8
107    mov [edi],edx
108    add esi,BYTE 4
109    add edi,BYTE 4
110    dec ecx
111    jnz .L5
112
113.L6 pop ebp
114    retn
115
116
117
118
119_ConvertX86p32_32RGBA888:
120
121    ; check short
122    cmp ecx,BYTE 32
123    ja .L3
124
125.L1 ; short loop
126    mov edx,[esi]
127    rol edx,8
128    mov [edi],edx
129    add esi,BYTE 4
130    add edi,BYTE 4
131    dec ecx
132    jnz .L1
133.L2
134    retn
135
136.L3 ; save ebp
137    push ebp
138
139    ; unroll four times
140    mov ebp,ecx
141    shr ebp,2
142
143    ; save count
144    push ecx
145
146.L4     mov eax,[esi]
147        mov ebx,[esi+4]
148
149        rol eax,8
150        mov ecx,[esi+8]
151
152        rol ebx,8
153        mov edx,[esi+12]
154
155        rol ecx,8
156        mov [edi+0],eax
157
158        rol edx,8
159        mov [edi+4],ebx
160
161        mov [edi+8],ecx
162        mov [edi+12],edx
163
164        add esi,BYTE 16
165        add edi,BYTE 16
166
167        dec ebp
168        jnz .L4
169
170    ; check tail
171    pop ecx
172    and ecx,BYTE 11b
173    jz .L6
174
175.L5 ; tail loop
176    mov edx,[esi]
177    rol edx,8
178    mov [edi],edx
179    add esi,BYTE 4
180    add edi,BYTE 4
181    dec ecx
182    jnz .L5
183
184.L6 pop ebp
185    retn
186
187
188
189
190_ConvertX86p32_32BGRA888:
191
192    ; check short
193    cmp ecx,BYTE 32
194    ja .L3
195
196.L1 ; short loop
197    mov edx,[esi]
198    bswap edx
199    mov [edi],edx
200    add esi,BYTE 4
201    add edi,BYTE 4
202    dec ecx
203    jnz .L1
204.L2
205    retn
206
207.L3 ; save ebp
208    push ebp
209
210    ; unroll four times
211    mov ebp,ecx
212    shr ebp,2
213
214    ; save count
215    push ecx
216
217.L4     mov eax,[esi]
218        mov ebx,[esi+4]
219
220        mov ecx,[esi+8]
221        mov edx,[esi+12]
222
223        bswap eax
224
225        bswap ebx
226
227        bswap ecx
228
229        bswap edx
230
231        mov [edi+0],eax
232        mov [edi+4],ebx
233
234        mov [edi+8],ecx
235        mov [edi+12],edx
236
237        add esi,BYTE 16
238        add edi,BYTE 16
239
240        dec ebp
241        jnz .L4
242
243    ; check tail
244    pop ecx
245    and ecx,BYTE 11b
246    jz .L6
247
248.L5 ; tail loop
249    mov edx,[esi]
250    bswap edx
251    mov [edi],edx
252    add esi,BYTE 4
253    add edi,BYTE 4
254    dec ecx
255    jnz .L5
256
257.L6 pop ebp
258    retn
259
260
261
262
263;; 32 bit RGB 888 to 24 BIT RGB 888
264
265_ConvertX86p32_24RGB888:
266
267	; check short
268	cmp ecx,BYTE 32
269	ja .L3
270
271.L1	; short loop
272	mov al,[esi]
273	mov bl,[esi+1]
274	mov dl,[esi+2]
275	mov [edi],al
276	mov [edi+1],bl
277	mov [edi+2],dl
278	add esi,BYTE 4
279	add edi,BYTE 3
280	dec ecx
281	jnz .L1
282.L2
283	retn
284
285.L3	;	 head
286	mov edx,edi
287	and edx,BYTE 11b
288	jz .L4
289	mov al,[esi]
290	mov bl,[esi+1]
291	mov dl,[esi+2]
292	mov [edi],al
293	mov [edi+1],bl
294	mov [edi+2],dl
295	add esi,BYTE 4
296	add edi,BYTE 3
297	dec ecx
298	jmp SHORT .L3
299
300.L4 ; unroll 4 times
301	push ebp
302	mov ebp,ecx
303	shr ebp,2
304
305    ; save count
306	push ecx
307
308.L5     mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
309        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
310
311        shl eax,8                       ;                        eax = [R][G][B][.]
312        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
313
314        shl ebx,8                       ;                        ebx = [r][g][b][.]
315        mov al,[esi+4]                  ;                        eax = [R][G][B][b]
316
317        ror eax,8                       ;                        eax = [b][R][G][B] (done)
318        mov bh,[esi+8+1]                ;                        ebx = [r][g][G][.]
319
320        mov [edi],eax
321        add edi,BYTE 3*4
322
323        shl ecx,8                       ;                        ecx = [r][g][b][.]
324        mov bl,[esi+8+0]                ;                        ebx = [r][g][G][B]
325
326        rol ebx,16                      ;                        ebx = [G][B][r][g] (done)
327        mov cl,[esi+8+2]                ;                        ecx = [r][g][b][R] (done)
328
329        mov [edi+4-3*4],ebx
330        add esi,BYTE 4*4
331
332        mov [edi+8-3*4],ecx
333        dec ebp
334
335        jnz .L5
336
337    ; check tail
338	pop ecx
339	and ecx,BYTE 11b
340	jz .L7
341
342.L6 ; tail loop
343	mov al,[esi]
344	mov bl,[esi+1]
345	mov dl,[esi+2]
346	mov [edi],al
347	mov [edi+1],bl
348	mov [edi+2],dl
349	add esi,BYTE 4
350	add edi,BYTE 3
351	dec ecx
352	jnz .L6
353
354.L7	pop ebp
355	retn
356
357
358
359
360;; 32 bit RGB 888 to 24 bit BGR 888
361
362_ConvertX86p32_24BGR888:
363
364	; check short
365	cmp ecx,BYTE 32
366	ja .L3
367
368
369.L1	; short loop
370	mov dl,[esi]
371	mov bl,[esi+1]
372	mov al,[esi+2]
373	mov [edi],al
374	mov [edi+1],bl
375	mov [edi+2],dl
376	add esi,BYTE 4
377	add edi,BYTE 3
378	dec ecx
379	jnz .L1
380.L2
381	retn
382
383.L3 ; head
384	mov edx,edi
385	and edx,BYTE 11b
386	jz .L4
387	mov dl,[esi]
388	mov bl,[esi+1]
389	mov al,[esi+2]
390	mov [edi],al
391	mov [edi+1],bl
392	mov [edi+2],dl
393	add esi,BYTE 4
394	add edi,BYTE 3
395	dec ecx
396	jmp SHORT .L3
397
398.L4	; unroll 4 times
399	push ebp
400	mov ebp,ecx
401	shr ebp,2
402
403	; save count
404	push ecx
405
406.L5
407	mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
408        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
409
410        bswap eax                       ;                        eax = [B][G][R][A]
411
412        bswap ebx                       ;                        ebx = [b][g][r][a]
413
414        mov al,[esi+4+2]                ;                        eax = [B][G][R][r]
415        mov bh,[esi+4+4+1]              ;                        ebx = [b][g][G][a]
416
417        ror eax,8                       ;                        eax = [r][B][G][R] (done)
418        mov bl,[esi+4+4+2]              ;                        ebx = [b][g][G][R]
419
420        ror ebx,16                      ;                        ebx = [G][R][b][g] (done)
421        mov [edi],eax
422
423        mov [edi+4],ebx
424        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
425
426        bswap ecx                       ;                        ecx = [b][g][r][a]
427
428        mov cl,[esi+8]                  ;                        ecx = [b][g][r][B] (done)
429        add esi,BYTE 4*4
430
431        mov [edi+8],ecx
432        add edi,BYTE 3*4
433
434        dec ebp
435        jnz .L5
436
437	; check tail
438	pop ecx
439	and ecx,BYTE 11b
440	jz .L7
441
442.L6	; tail loop
443	mov dl,[esi]
444	mov bl,[esi+1]
445	mov al,[esi+2]
446	mov [edi],al
447	mov [edi+1],bl
448	mov [edi+2],dl
449	add esi,BYTE 4
450	add edi,BYTE 3
451	dec ecx
452	jnz .L6
453
454.L7
455	pop ebp
456	retn
457
458
459
460
461;; 32 bit RGB 888 to 16 BIT RGB 565
462
463_ConvertX86p32_16RGB565:
464	; check short
465	cmp ecx,BYTE 16
466	ja .L3
467
468.L1 ; short loop
469	mov bl,[esi+0]    ; blue
470	mov al,[esi+1]    ; green
471	mov ah,[esi+2]    ; red
472	shr ah,3
473        and al,11111100b
474	shl eax,3
475	shr bl,3
476	add al,bl
477	mov [edi+0],al
478	mov [edi+1],ah
479	add esi,BYTE 4
480	add edi,BYTE 2
481	dec ecx
482	jnz .L1
483
484.L2:				; End of short loop
485	retn
486
487
488.L3	; head
489	mov ebx,edi
490	and ebx,BYTE 11b
491	jz .L4
492
493	mov bl,[esi+0]    ; blue
494	mov al,[esi+1]    ; green
495	mov ah,[esi+2]    ; red
496	shr ah,3
497	and al,11111100b
498	shl eax,3
499	shr bl,3
500	add al,bl
501	mov [edi+0],al
502	mov [edi+1],ah
503	add esi,BYTE 4
504	add edi,BYTE 2
505	dec ecx
506
507.L4:
508    ; save count
509	push ecx
510
511    ; unroll twice
512	shr ecx,1
513
514    ; point arrays to end
515	lea esi,[esi+ecx*8]
516	lea edi,[edi+ecx*4]
517
518    ; negative counter
519	neg ecx
520	jmp SHORT .L6
521
522.L5:
523	mov [edi+ecx*4-4],eax
524.L6:
525	mov eax,[esi+ecx*8]
526
527        shr ah,2
528        mov ebx,[esi+ecx*8+4]
529
530        shr eax,3
531        mov edx,[esi+ecx*8+4]
532
533        shr bh,2
534        mov dl,[esi+ecx*8+2]
535
536        shl ebx,13
537        and eax,000007FFh
538
539        shl edx,8
540        and ebx,07FF0000h
541
542        and edx,0F800F800h
543        add eax,ebx
544
545        add eax,edx
546        inc ecx
547
548        jnz .L5
549
550	mov [edi+ecx*4-4],eax
551
552    ; tail
553	pop ecx
554	test cl,1
555	jz .L7
556
557	mov bl,[esi+0]    ; blue
558	mov al,[esi+1]    ; green
559	mov ah,[esi+2]    ; red
560	shr ah,3
561	and al,11111100b
562	shl eax,3
563	shr bl,3
564	add al,bl
565	mov [edi+0],al
566	mov [edi+1],ah
567	add esi,BYTE 4
568	add edi,BYTE 2
569
570.L7:
571	retn
572
573
574
575
576;; 32 bit RGB 888 to 16 BIT BGR 565
577
578_ConvertX86p32_16BGR565:
579
580	; check short
581	cmp ecx,BYTE 16
582	ja .L3
583
584.L1	; short loop
585	mov ah,[esi+0]    ; blue
586	mov al,[esi+1]    ; green
587	mov bl,[esi+2]    ; red
588	shr ah,3
589	and al,11111100b
590	shl eax,3
591	shr bl,3
592	add al,bl
593	mov [edi+0],al
594	mov [edi+1],ah
595	add esi,BYTE 4
596	add edi,BYTE 2
597	dec ecx
598	jnz .L1
599.L2
600	retn
601
602.L3	; head
603	mov ebx,edi
604	and ebx,BYTE 11b
605	jz .L4
606	mov ah,[esi+0]    ; blue
607	mov al,[esi+1]    ; green
608	mov bl,[esi+2]    ; red
609	shr ah,3
610	and al,11111100b
611	shl eax,3
612	shr bl,3
613	add al,bl
614	mov [edi+0],al
615	mov [edi+1],ah
616	add esi,BYTE 4
617	add edi,BYTE 2
618	dec ecx
619
620.L4	; save count
621	push ecx
622
623	; unroll twice
624	shr ecx,1
625
626	; point arrays to end
627	lea esi,[esi+ecx*8]
628	lea edi,[edi+ecx*4]
629
630	; negative count
631	neg ecx
632	jmp SHORT .L6
633
634.L5
635	mov [edi+ecx*4-4],eax
636.L6
637	mov edx,[esi+ecx*8+4]
638
639        mov bh,[esi+ecx*8+4]
640        mov ah,[esi+ecx*8]
641
642        shr bh,3
643        mov al,[esi+ecx*8+1]
644
645        shr ah,3
646        mov bl,[esi+ecx*8+5]
647
648        shl eax,3
649        mov dl,[esi+ecx*8+2]
650
651        shl ebx,19
652        and eax,0000FFE0h
653
654        shr edx,3
655        and ebx,0FFE00000h
656
657        and edx,001F001Fh
658        add eax,ebx
659
660        add eax,edx
661        inc ecx
662
663        jnz .L5
664
665	mov [edi+ecx*4-4],eax
666
667	; tail
668	pop ecx
669	and ecx,BYTE 1
670	jz .L7
671	mov ah,[esi+0]    ; blue
672	mov al,[esi+1]    ; green
673	mov bl,[esi+2]    ; red
674	shr ah,3
675	and al,11111100b
676	shl eax,3
677	shr bl,3
678	add al,bl
679	mov [edi+0],al
680	mov [edi+1],ah
681	add esi,BYTE 4
682	add edi,BYTE 2
683
684.L7
685	retn
686
687
688
689
690;; 32 BIT RGB TO 16 BIT RGB 555
691
692_ConvertX86p32_16RGB555:
693
694	; check short
695	cmp ecx,BYTE 16
696	ja .L3
697
698.L1	; short loop
699	mov bl,[esi+0]    ; blue
700	mov al,[esi+1]    ; green
701	mov ah,[esi+2]    ; red
702	shr ah,3
703	and al,11111000b
704	shl eax,2
705	shr bl,3
706	add al,bl
707	mov [edi+0],al
708	mov [edi+1],ah
709	add esi,BYTE 4
710	add edi,BYTE 2
711	dec ecx
712	jnz .L1
713.L2
714	retn
715
716.L3	; head
717	mov ebx,edi
718        and ebx,BYTE 11b
719	jz .L4
720	mov bl,[esi+0]    ; blue
721	mov al,[esi+1]    ; green
722	mov ah,[esi+2]    ; red
723	shr ah,3
724	and al,11111000b
725	shl eax,2
726	shr bl,3
727	add al,bl
728	mov [edi+0],al
729	mov [edi+1],ah
730	add esi,BYTE 4
731	add edi,BYTE 2
732	dec ecx
733
734.L4	; save count
735	push ecx
736
737	; unroll twice
738	shr ecx,1
739
740	; point arrays to end
741	lea esi,[esi+ecx*8]
742	lea edi,[edi+ecx*4]
743
744	; negative counter
745	neg ecx
746	jmp SHORT .L6
747
748.L5
749	mov [edi+ecx*4-4],eax
750.L6
751	mov eax,[esi+ecx*8]
752
753        shr ah,3
754        mov ebx,[esi+ecx*8+4]
755
756        shr eax,3
757        mov edx,[esi+ecx*8+4]
758
759        shr bh,3
760        mov dl,[esi+ecx*8+2]
761
762        shl ebx,13
763        and eax,000007FFh
764
765        shl edx,7
766        and ebx,07FF0000h
767
768        and edx,07C007C00h
769        add eax,ebx
770
771        add eax,edx
772        inc ecx
773
774        jnz .L5
775
776	mov [edi+ecx*4-4],eax
777
778	; tail
779	pop ecx
780	and ecx,BYTE 1
781	jz .L7
782	mov bl,[esi+0]    ; blue
783	mov al,[esi+1]    ; green
784	mov ah,[esi+2]    ; red
785	shr ah,3
786	and al,11111000b
787	shl eax,2
788	shr bl,3
789	add al,bl
790	mov [edi+0],al
791	mov [edi+1],ah
792	add esi,BYTE 4
793	add edi,BYTE 2
794
795.L7
796	retn
797
798
799
800
801;; 32 BIT RGB TO 16 BIT BGR 555
802
803_ConvertX86p32_16BGR555:
804
805	; check short
806	cmp ecx,BYTE 16
807	ja .L3
808
809
810.L1	; short loop
811	mov ah,[esi+0]    ; blue
812	mov al,[esi+1]    ; green
813	mov bl,[esi+2]    ; red
814	shr ah,3
815	and al,11111000b
816	shl eax,2
817	shr bl,3
818	add al,bl
819	mov [edi+0],al
820	mov [edi+1],ah
821	add esi,BYTE 4
822	add edi,BYTE 2
823	dec ecx
824	jnz .L1
825.L2
826	retn
827
828.L3	; head
829	mov ebx,edi
830        and ebx,BYTE 11b
831	jz .L4
832	mov ah,[esi+0]    ; blue
833	mov al,[esi+1]    ; green
834	mov bl,[esi+2]    ; red
835	shr ah,3
836	and al,11111000b
837	shl eax,2
838	shr bl,3
839	add al,bl
840	mov [edi+0],al
841	mov [edi+1],ah
842	add esi,BYTE 4
843	add edi,BYTE 2
844	dec ecx
845
846.L4	; save count
847	push ecx
848
849	; unroll twice
850	shr ecx,1
851
852	; point arrays to end
853	lea esi,[esi+ecx*8]
854	lea edi,[edi+ecx*4]
855
856	; negative counter
857	neg ecx
858	jmp SHORT .L6
859
860.L5
861	mov [edi+ecx*4-4],eax
862.L6
863	mov edx,[esi+ecx*8+4]
864
865        mov bh,[esi+ecx*8+4]
866        mov ah,[esi+ecx*8]
867
868        shr bh,3
869        mov al,[esi+ecx*8+1]
870
871        shr ah,3
872        mov bl,[esi+ecx*8+5]
873
874        shl eax,2
875        mov dl,[esi+ecx*8+2]
876
877        shl ebx,18
878        and eax,00007FE0h
879
880        shr edx,3
881        and ebx,07FE00000h
882
883        and edx,001F001Fh
884        add eax,ebx
885
886        add eax,edx
887        inc ecx
888
889        jnz .L5
890
891	mov [edi+ecx*4-4],eax
892
893	; tail
894	pop ecx
895	and ecx,BYTE 1
896	jz .L7
897	mov ah,[esi+0]    ; blue
898	mov al,[esi+1]    ; green
899	mov bl,[esi+2]    ; red
900	shr ah,3
901	and al,11111000b
902	shl eax,2
903	shr bl,3
904	add al,bl
905	mov [edi+0],al
906	mov [edi+1],ah
907	add esi,BYTE 4
908	add edi,BYTE 2
909
910.L7
911	retn
912
913
914
915
916
917;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
918;; This routine writes FOUR pixels at once (dword) and then, if they exist
919;; the trailing three pixels
920_ConvertX86p32_8RGB332:
921
922
923.L_ALIGNED
924	push ecx
925
926	shr ecx,2		; We will draw 4 pixels at once
927	jnz .L1
928
929	jmp .L2			; short jump out of range :(
930
931.L1:
932	mov eax,[esi]		; first pair of pixels
933	mov edx,[esi+4]
934
935	shr dl,6
936	mov ebx,eax
937
938	shr al,6
939	and ah,0e0h
940
941	shr ebx,16
942	and dh,0e0h
943
944	shr ah,3
945	and bl,0e0h
946
947	shr dh,3
948
949	or al,bl
950
951	mov ebx,edx
952	or al,ah
953
954	shr ebx,16
955	or dl,dh
956
957	and bl,0e0h
958
959	or dl,bl
960
961	mov ah,dl
962
963
964
965	mov ebx,[esi+8]		; second pair of pixels
966
967	mov edx,ebx
968	and bh,0e0h
969
970	shr bl,6
971	and edx,0e00000h
972
973	shr edx,16
974
975	shr bh,3
976
977	ror eax,16
978	or bl,dl
979
980	mov edx,[esi+12]
981	or bl,bh
982
983	mov al,bl
984
985	mov ebx,edx
986	and dh,0e0h
987
988	shr dl,6
989	and ebx,0e00000h
990
991	shr dh,3
992	mov ah,dl
993
994	shr ebx,16
995	or ah,dh
996
997	or ah,bl
998
999	rol eax,16
1000	add esi,BYTE 16
1001
1002	mov [edi],eax
1003	add edi,BYTE 4
1004
1005	dec ecx
1006	jz .L2			; L1 out of range for short jump :(
1007
1008	jmp .L1
1009.L2:
1010
1011	pop ecx
1012	and ecx,BYTE 3		; mask out number of pixels to draw
1013
1014	jz .L4			; Nothing to do anymore
1015
1016.L3:
1017	mov eax,[esi]		; single pixel conversion for trailing pixels
1018
1019        mov ebx,eax
1020
1021        shr al,6
1022        and ah,0e0h
1023
1024        shr ebx,16
1025
1026        shr ah,3
1027        and bl,0e0h
1028
1029        or al,ah
1030        or al,bl
1031
1032        mov [edi],al
1033
1034        inc edi
1035        add esi,BYTE 4
1036
1037	dec ecx
1038	jnz .L3
1039
1040.L4:
1041	retn
1042
1043%ifidn __OUTPUT_FORMAT__,elf
1044section .note.GNU-stack noalloc noexec nowrite progbits
1045%endif
1046