1 /* mmx.c
2
3 MultiMedia eXtensions GCC interface library for IA32.
4
5 To use this library, simply include this header file
6 and compile with GCC. You MUST have inlining enabled
7 in order for mmx_ok() to work; this can be done by
8 simply using -O on the GCC command line.
9
10 Compiling with -DMMX_TRACE will cause detailed trace
11 output to be sent to stderr for each mmx operation.
12 This adds lots of code, and obviously slows execution to
13 a crawl, but can be very useful for debugging.
14
15 THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
16 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
17 LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
18 AND FITNESS FOR ANY PARTICULAR PURPOSE.
19
20 1997-99 by H. Dietz and R. Fisher
21
22 Notes:
23 It appears that the latest gas has the pand problem fixed, therefore
24 I'll undefine BROKEN_PAND by default.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #include "goom_config.h"
32
33 #ifdef HAVE_MMX
34
35 #define BUFFPOINTNB 16
36 #define BUFFPOINTMASK 0xffff
37 #define BUFFINCR 0xff
38
39 #include "mmx.h"
40 #include "goom_graphic.h"
41
42 #define sqrtperte 16
43 // faire : a % sqrtperte <=> a & pertemask
44 #define PERTEMASK 0xf
45 // faire : a / sqrtperte <=> a >> PERTEDEC
46 #define PERTEDEC 4
47
48 int
mmx_supported(void)49 mmx_supported (void)
50 {
51 return (mm_support () & 0x1);
52 }
53
54 void
zoom_filter_mmx(int prevX,int prevY,Pixel * expix1,Pixel * expix2,int * brutS,int * brutD,int buffratio,int precalCoef[16][16])55 zoom_filter_mmx (int prevX, int prevY,
56 Pixel * expix1, Pixel * expix2,
57 int *brutS, int *brutD, int buffratio, int precalCoef[16][16])
58 {
59 unsigned int ax = (prevX - 1) << PERTEDEC, ay = (prevY - 1) << PERTEDEC;
60
61 int bufsize = prevX * prevY;
62 int loop;
63
64 __asm__ __volatile__ ("pxor %mm7,%mm7");
65
66 for (loop = 0; loop < bufsize; loop++) {
67 /* int couleur; */
68 int px, py;
69 int pos;
70 int coeffs;
71
72 int myPos = loop << 1, myPos2 = myPos + 1;
73 int brutSmypos = brutS[myPos];
74
75 px = brutSmypos + (((brutD[myPos] -
76 brutSmypos) * buffratio) >> BUFFPOINTNB);
77 brutSmypos = brutS[myPos2];
78 py = brutSmypos + (((brutD[myPos2] -
79 brutSmypos) * buffratio) >> BUFFPOINTNB);
80
81 if ((py >= ay) || (px >= ax)) {
82 pos = coeffs = 0;
83 } else {
84 pos = ((px >> PERTEDEC) + prevX * (py >> PERTEDEC));
85 // coef en modulo 15
86 coeffs = precalCoef[px & PERTEMASK][py & PERTEMASK];
87 }
88
89 __asm__ __volatile__ ("movd %2, %%mm6 \n\t"
90 /* recuperation des deux premiers pixels dans mm0 et mm1 */
91 "movq (%3,%1,4), %%mm0 \n\t" /* b1-v1-r1-a1-b2-v2-r2-a2 */
92 "movq %%mm0, %%mm1 \n\t" /* b1-v1-r1-a1-b2-v2-r2-a2 */
93 /* depackage du premier pixel */
94 "punpcklbw %%mm7, %%mm0 \n\t" /* 00-b2-00-v2-00-r2-00-a2 */
95 "movq %%mm6, %%mm5 \n\t" /* ??-??-??-??-c4-c3-c2-c1 */
96 /* depackage du 2ieme pixel */
97 "punpckhbw %%mm7, %%mm1 \n\t" /* 00-b1-00-v1-00-r1-00-a1 */
98 /* extraction des coefficients... */
99 "punpcklbw %%mm5, %%mm6 \n\t" /* c4-c4-c3-c3-c2-c2-c1-c1 */
100 "movq %%mm6, %%mm4 \n\t" /* c4-c4-c3-c3-c2-c2-c1-c1 */
101 "movq %%mm6, %%mm5 \n\t" /* c4-c4-c3-c3-c2-c2-c1-c1 */
102 "punpcklbw %%mm5, %%mm6 \n\t" /* c2-c2-c2-c2-c1-c1-c1-c1 */
103 "punpckhbw %%mm5, %%mm4 \n\t" /* c4-c4-c4-c4-c3-c3-c3-c3 */
104 "movq %%mm6, %%mm3 \n\t" /* c2-c2-c2-c2-c1-c1-c1-c1 */
105 "punpcklbw %%mm7, %%mm6 \n\t" /* 00-c1-00-c1-00-c1-00-c1 */
106 "punpckhbw %%mm7, %%mm3 \n\t" /* 00-c2-00-c2-00-c2-00-c2 */
107 /* multiplication des pixels par les coefficients */
108 "pmullw %%mm6, %%mm0 \n\t" /* c1*b2-c1*v2-c1*r2-c1*a2 */
109 "pmullw %%mm3, %%mm1 \n\t" /* c2*b1-c2*v1-c2*r1-c2*a1 */
110 "paddw %%mm1, %%mm0 \n\t"
111 /* ...extraction des 2 derniers coefficients */
112 "movq %%mm4, %%mm5 \n\t" /* c4-c4-c4-c4-c3-c3-c3-c3 */
113 "punpcklbw %%mm7, %%mm4 \n\t" /* 00-c3-00-c3-00-c3-00-c3 */
114 "punpckhbw %%mm7, %%mm5 \n\t" /* 00-c4-00-c4-00-c4-00-c4 */
115 /* ajouter la longueur de ligne a esi */
116 "addl 8(%%ebp),%1 \n\t"
117 /* recuperation des 2 derniers pixels */
118 "movq (%3,%1,4), %%mm1 \n\t" "movq %%mm1, %%mm2 \n\t"
119 /* depackage des pixels */
120 "punpcklbw %%mm7, %%mm1 \n\t" "punpckhbw %%mm7, %%mm2 \n\t"
121 /* multiplication pas les coeffs */
122 "pmullw %%mm4, %%mm1 \n\t" "pmullw %%mm5, %%mm2 \n\t"
123 /* ajout des valeurs obtenues ? la valeur finale */
124 "paddw %%mm1, %%mm0 \n\t" "paddw %%mm2, %%mm0 \n\t"
125 /* division par 256 = 16+16+16+16, puis repackage du pixel final */
126 "psrlw $8, %%mm0 \n\t"
127 "packuswb %%mm7, %%mm0 \n\t" "movd %%mm0,%0 \n\t":"=g" (expix2[loop])
128 :"r" (pos), "r" (coeffs), "r" (expix1)
129
130 );
131
132 emms ();
133 }
134 }
135
136 #define DRAWMETHOD_PLUS_MMX(_out,_backbuf,_col) \
137 { \
138 movd_m2r(_backbuf, mm0); \
139 paddusb_m2r(_col, mm0); \
140 movd_r2m(mm0, _out); \
141 }
142
143 #define DRAWMETHOD DRAWMETHOD_PLUS_MMX(*p,*p,col)
144
145 void
draw_line_mmx(Pixel * data,int x1,int y1,int x2,int y2,int col,int screenx,int screeny)146 draw_line_mmx (Pixel * data, int x1, int y1, int x2, int y2, int col,
147 int screenx, int screeny)
148 {
149 int x, y, dx, dy, yy, xx;
150 Pixel *p;
151
152 if ((y1 < 0) || (y2 < 0) || (x1 < 0) || (x2 < 0) || (y1 >= screeny)
153 || (y2 >= screeny) || (x1 >= screenx) || (x2 >= screenx))
154 goto end_of_line;
155
156 dx = x2 - x1;
157 dy = y2 - y1;
158 if (x1 >= x2) {
159 int tmp;
160
161 tmp = x1;
162 x1 = x2;
163 x2 = tmp;
164 tmp = y1;
165 y1 = y2;
166 y2 = tmp;
167 dx = x2 - x1;
168 dy = y2 - y1;
169 }
170
171 /* vertical line */
172 if (dx == 0) {
173 if (y1 < y2) {
174 p = &(data[(screenx * y1) + x1]);
175 for (y = y1; y <= y2; y++) {
176 DRAWMETHOD;
177 p += screenx;
178 }
179 } else {
180 p = &(data[(screenx * y2) + x1]);
181 for (y = y2; y <= y1; y++) {
182 DRAWMETHOD;
183 p += screenx;
184 }
185 }
186 goto end_of_line;
187 }
188 /* horizontal line */
189 if (dy == 0) {
190 if (x1 < x2) {
191 p = &(data[(screenx * y1) + x1]);
192 for (x = x1; x <= x2; x++) {
193 DRAWMETHOD;
194 p++;
195 }
196 goto end_of_line;
197 } else {
198 p = &(data[(screenx * y1) + x2]);
199 for (x = x2; x <= x1; x++) {
200 DRAWMETHOD;
201 p++;
202 }
203 goto end_of_line;
204 }
205 }
206 /* 1 */
207 /* \ */
208 /* \ */
209 /* 2 */
210 if (y2 > y1) {
211 /* steep */
212 if (dy > dx) {
213 dx = ((dx << 16) / dy);
214 x = x1 << 16;
215 for (y = y1; y <= y2; y++) {
216 xx = x >> 16;
217 p = &(data[(screenx * y) + xx]);
218 DRAWMETHOD;
219 if (xx < (screenx - 1)) {
220 p++;
221 /* DRAWMETHOD; */
222 }
223 x += dx;
224 }
225 goto end_of_line;
226 }
227 /* shallow */
228 else {
229 dy = ((dy << 16) / dx);
230 y = y1 << 16;
231 for (x = x1; x <= x2; x++) {
232 yy = y >> 16;
233 p = &(data[(screenx * yy) + x]);
234 DRAWMETHOD;
235 if (yy < (screeny - 1)) {
236 p += screeny;
237 /* DRAWMETHOD; */
238 }
239 y += dy;
240 }
241 }
242 }
243 /* 2 */
244 /* / */
245 /* / */
246 /* 1 */
247 else {
248 /* steep */
249 if (-dy > dx) {
250 dx = ((dx << 16) / -dy);
251 x = (x1 + 1) << 16;
252 for (y = y1; y >= y2; y--) {
253 xx = x >> 16;
254 p = &(data[(screenx * y) + xx]);
255 DRAWMETHOD;
256 if (xx < (screenx - 1)) {
257 p--;
258 /* DRAWMETHOD; */
259 }
260 x += dx;
261 }
262 goto end_of_line;
263 }
264 /* shallow */
265 else {
266 dy = ((dy << 16) / dx);
267 y = y1 << 16;
268 for (x = x1; x <= x2; x++) {
269 yy = y >> 16;
270 p = &(data[(screenx * yy) + x]);
271 DRAWMETHOD;
272 if (yy < (screeny - 1)) {
273 p += screeny;
274 /* DRAWMETHOD; */
275 }
276 y += dy;
277 }
278 goto end_of_line;
279 }
280 }
281 end_of_line:
282 emms ();
283 /* __asm__ __volatile__ ("emms"); */
284 }
285 #else
286 int
mmx_supported(void)287 mmx_supported (void)
288 {
289 return (0);
290 }
291 #endif /* HAVE_MMX */
292