• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1
2/*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26/*
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
29 * in there will break the build on some platforms.
30 */
31
32#include "assyntax.h"
33#define MATH_ASM_PTR_SIZE 4
34#include "math/m_vector_asm.h"
35#include "xform_args.h"
36
37	SEG_TEXT
38
39#define FP_ONE		1065353216
40#define FP_ZERO		0
41
42#define SRC0		REGOFF(0, ESI)
43#define SRC1		REGOFF(4, ESI)
44#define SRC2		REGOFF(8, ESI)
45#define SRC3		REGOFF(12, ESI)
46#define DST0		REGOFF(0, EDI)
47#define DST1		REGOFF(4, EDI)
48#define DST2		REGOFF(8, EDI)
49#define DST3		REGOFF(12, EDI)
50#define MAT0		REGOFF(0, EDX)
51#define MAT1		REGOFF(4, EDX)
52#define MAT2		REGOFF(8, EDX)
53#define MAT3		REGOFF(12, EDX)
54#define MAT4		REGOFF(16, EDX)
55#define MAT5		REGOFF(20, EDX)
56#define MAT6		REGOFF(24, EDX)
57#define MAT7		REGOFF(28, EDX)
58#define MAT8		REGOFF(32, EDX)
59#define MAT9		REGOFF(36, EDX)
60#define MAT10		REGOFF(40, EDX)
61#define MAT11		REGOFF(44, EDX)
62#define MAT12		REGOFF(48, EDX)
63#define MAT13		REGOFF(52, EDX)
64#define MAT14		REGOFF(56, EDX)
65#define MAT15		REGOFF(60, EDX)
66
67
68ALIGNTEXT16
69GLOBL GLNAME( _mesa_x86_transform_points4_general )
70HIDDEN(_mesa_x86_transform_points4_general)
71GLNAME( _mesa_x86_transform_points4_general ):
72
73#define FRAME_OFFSET 8
74	PUSH_L( ESI )
75	PUSH_L( EDI )
76
77	MOV_L( ARG_SOURCE, ESI )
78	MOV_L( ARG_DEST, EDI )
79
80	MOV_L( ARG_MATRIX, EDX )
81	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
82
83	TEST_L( ECX, ECX )
84	JZ( LLBL(x86_p4_gr_done) )
85
86	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
87	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
88
89	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
90	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
91
92	SHL_L( CONST(4), ECX )
93	MOV_L( REGOFF(V4F_START, ESI), ESI )
94
95	MOV_L( REGOFF(V4F_START, EDI), EDI )
96	ADD_L( EDI, ECX )
97
98ALIGNTEXT16
99LLBL(x86_p4_gr_loop):
100
101	FLD_S( SRC0 )			/* F4 */
102	FMUL_S( MAT0 )
103	FLD_S( SRC0 )			/* F5 F4 */
104	FMUL_S( MAT1 )
105	FLD_S( SRC0 )			/* F6 F5 F4 */
106	FMUL_S( MAT2 )
107	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
108	FMUL_S( MAT3 )
109
110	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
111	FMUL_S( MAT4 )
112	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
113	FMUL_S( MAT5 )
114	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
115	FMUL_S( MAT6 )
116	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
117	FMUL_S( MAT7 )
118
119	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
120	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
121	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
122	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
123	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
124	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
125
126	FLD_S( SRC2 )			/* F0 F7 F6 F5 F4 */
127	FMUL_S( MAT8 )
128	FLD_S( SRC2 )			/* F1 F0 F7 F6 F5 F4 */
129	FMUL_S( MAT9 )
130	FLD_S( SRC2 )			/* F2 F1 F0 F7 F6 F5 F4 */
131	FMUL_S( MAT10 )
132	FLD_S( SRC2 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
133	FMUL_S( MAT11 )
134
135	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
136	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
137	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
138	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
139	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
140	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
141
142	FLD_S( SRC3 )			/* F0 F7 F6 F5 F4 */
143	FMUL_S( MAT12 )
144	FLD_S( SRC3 )			/* F1 F0 F7 F6 F5 F4 */
145	FMUL_S( MAT13 )
146	FLD_S( SRC3 )			/* F2 F1 F0 F7 F6 F5 F4 */
147	FMUL_S( MAT14 )
148	FLD_S( SRC3 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
149	FMUL_S( MAT15 )
150
151	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
152	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
153	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
154	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
155	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
156	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
157
158	FXCH( ST(3) )			/* F4 F6 F5 F7 */
159	FSTP_S( DST0 )		/* F6 F5 F7 */
160	FXCH( ST(1) )			/* F5 F6 F7 */
161	FSTP_S( DST1 )		/* F6 F7 */
162	FSTP_S( DST2 )		/* F7 */
163	FSTP_S( DST3 )		/* */
164
165LLBL(x86_p4_gr_skip):
166
167	ADD_L( CONST(16), EDI )
168	ADD_L( EAX, ESI )
169	CMP_L( ECX, EDI )
170	JNE( LLBL(x86_p4_gr_loop) )
171
172LLBL(x86_p4_gr_done):
173
174	POP_L( EDI )
175	POP_L( ESI )
176	RET
177#undef FRAME_OFFSET
178
179
180
181
182ALIGNTEXT16
183GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
184HIDDEN(_mesa_x86_transform_points4_perspective)
185GLNAME( _mesa_x86_transform_points4_perspective ):
186
187#define FRAME_OFFSET 12
188	PUSH_L( ESI )
189	PUSH_L( EDI )
190	PUSH_L( EBX )
191
192	MOV_L( ARG_SOURCE, ESI )
193	MOV_L( ARG_DEST, EDI )
194
195	MOV_L( ARG_MATRIX, EDX )
196	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
197
198	TEST_L( ECX, ECX )
199	JZ( LLBL(x86_p4_pr_done) )
200
201	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
202	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
203
204	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
205	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
206
207	SHL_L( CONST(4), ECX )
208	MOV_L( REGOFF(V4F_START, ESI), ESI )
209
210	MOV_L( REGOFF(V4F_START, EDI), EDI )
211	ADD_L( EDI, ECX )
212
213ALIGNTEXT16
214LLBL(x86_p4_pr_loop):
215
216	FLD_S( SRC0 )			/* F4 */
217	FMUL_S( MAT0 )
218
219	FLD_S( SRC1 )			/* F5 F4 */
220	FMUL_S( MAT5 )
221
222	FLD_S( SRC2 )			/* F0 F5 F4 */
223	FMUL_S( MAT8 )
224	FLD_S( SRC2 )			/* F1 F0 F5 F4 */
225	FMUL_S( MAT9 )
226	FLD_S( SRC2 )			/* F6 F1 F0 F5 F4 */
227	FMUL_S( MAT10 )
228
229	FXCH( ST(2) )			/* F0 F1 F6 F5 F4 */
230	FADDP( ST0, ST(4) )		/* F1 F6 F5 F4 */
231	FADDP( ST0, ST(2) )		/* F6 F5 F4 */
232
233	FLD_S( SRC3 )			/* F2 F6 F5 F4 */
234	FMUL_S( MAT14 )
235
236	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
237
238	MOV_L( SRC2, EBX )
239	XOR_L( CONST(-2147483648), EBX )/* change sign */
240
241	FXCH( ST(2) )			/* F4 F5 F6 */
242	FSTP_S( DST0 )		/* F5 F6 */
243	FSTP_S( DST1 )		/* F6 */
244	FSTP_S( DST2 )		/* */
245	MOV_L( EBX, DST3 )
246
247LLBL(x86_p4_pr_skip):
248
249	ADD_L( CONST(16), EDI )
250	ADD_L( EAX, ESI )
251	CMP_L( ECX, EDI )
252	JNE( LLBL(x86_p4_pr_loop) )
253
254LLBL(x86_p4_pr_done):
255
256	POP_L( EBX )
257	POP_L( EDI )
258	POP_L( ESI )
259	RET
260#undef FRAME_OFFSET
261
262
263
264
265ALIGNTEXT16
266GLOBL GLNAME( _mesa_x86_transform_points4_3d )
267HIDDEN(_mesa_x86_transform_points4_3d)
268GLNAME( _mesa_x86_transform_points4_3d ):
269
270#define FRAME_OFFSET 12
271	PUSH_L( ESI )
272	PUSH_L( EDI )
273	PUSH_L( EBX )
274
275	MOV_L( ARG_SOURCE, ESI )
276	MOV_L( ARG_DEST, EDI )
277
278	MOV_L( ARG_MATRIX, EDX )
279	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
280
281	TEST_L( ECX, ECX )
282	JZ( LLBL(x86_p4_3dr_done) )
283
284	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
285	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
286
287	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
288	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
289
290	SHL_L( CONST(4), ECX )
291	MOV_L( REGOFF(V4F_START, ESI), ESI )
292
293	MOV_L( REGOFF(V4F_START, EDI), EDI )
294	ADD_L( EDI, ECX )
295
296ALIGNTEXT16
297LLBL(x86_p4_3dr_loop):
298
299	FLD_S( SRC0 )			/* F4 */
300	FMUL_S( MAT0 )
301	FLD_S( SRC0 )			/* F5 F4 */
302	FMUL_S( MAT1 )
303	FLD_S( SRC0 )			/* F6 F5 F4 */
304	FMUL_S( MAT2 )
305
306	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
307	FMUL_S( MAT4 )
308	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
309	FMUL_S( MAT5 )
310	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
311	FMUL_S( MAT6 )
312
313	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
314	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
315	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
316	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
317
318	FLD_S( SRC2 )			/* F0 F6 F5 F4 */
319	FMUL_S( MAT8 )
320	FLD_S( SRC2 )			/* F1 F0 F6 F5 F4 */
321	FMUL_S( MAT9 )
322	FLD_S( SRC2 )			/* F2 F1 F0 F6 F5 F4 */
323	FMUL_S( MAT10 )
324
325	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
326	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
327	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
328	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
329
330	FLD_S( SRC3 )			/* F0 F6 F5 F4 */
331	FMUL_S( MAT12 )
332	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */
333	FMUL_S( MAT13 )
334	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */
335	FMUL_S( MAT14 )
336
337	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
338	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
339	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
340	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
341
342	MOV_L( SRC3, EBX )
343
344	FXCH( ST(2) )			/* F4 F5 F6 */
345	FSTP_S( DST0 )		/* F5 F6 */
346	FSTP_S( DST1 )		/* F6 */
347	FSTP_S( DST2 )		/* */
348	MOV_L( EBX, DST3 )
349
350LLBL(x86_p4_3dr_skip):
351
352	ADD_L( CONST(16), EDI )
353	ADD_L( EAX, ESI )
354	CMP_L( ECX, EDI )
355	JNE( LLBL(x86_p4_3dr_loop) )
356
357LLBL(x86_p4_3dr_done):
358
359	POP_L( EBX )
360	POP_L( EDI )
361	POP_L( ESI )
362	RET
363#undef FRAME_OFFSET
364
365
366
367
368ALIGNTEXT16
369GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
370HIDDEN(_mesa_x86_transform_points4_3d_no_rot)
371GLNAME(_mesa_x86_transform_points4_3d_no_rot):
372
373#define FRAME_OFFSET 12
374	PUSH_L( ESI )
375	PUSH_L( EDI )
376	PUSH_L( EBX )
377
378	MOV_L( ARG_SOURCE, ESI )
379	MOV_L( ARG_DEST, EDI )
380
381	MOV_L( ARG_MATRIX, EDX )
382	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
383
384	TEST_L( ECX, ECX )
385	JZ( LLBL(x86_p4_3dnrr_done) )
386
387	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
388	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
389
390	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
391	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
392
393	SHL_L( CONST(4), ECX )
394	MOV_L( REGOFF(V4F_START, ESI), ESI )
395
396	MOV_L( REGOFF(V4F_START, EDI), EDI )
397	ADD_L( EDI, ECX )
398
399ALIGNTEXT16
400LLBL(x86_p4_3dnrr_loop):
401
402	FLD_S( SRC0 )			/* F4 */
403	FMUL_S( MAT0 )
404
405	FLD_S( SRC1 )			/* F5 F4 */
406	FMUL_S( MAT5 )
407
408	FLD_S( SRC2 )			/* F6 F5 F4 */
409	FMUL_S( MAT10 )
410
411	FLD_S( SRC3 )			/* F0 F6 F5 F4 */
412	FMUL_S( MAT12 )
413	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */
414	FMUL_S( MAT13 )
415	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */
416	FMUL_S( MAT14 )
417
418	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
419	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
420	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
421	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
422
423	MOV_L( SRC3, EBX )
424
425	FXCH( ST(2) )			/* F4 F5 F6 */
426	FSTP_S( DST0   )		/* F5 F6 */
427	FSTP_S( DST1   )		/* F6 */
428	FSTP_S( DST2   )		/* */
429	MOV_L( EBX, DST3 )
430
431LLBL(x86_p4_3dnrr_skip):
432
433	ADD_L( CONST(16), EDI )
434	ADD_L( EAX, ESI )
435	CMP_L( ECX, EDI )
436	JNE( LLBL(x86_p4_3dnrr_loop) )
437
438LLBL(x86_p4_3dnrr_done):
439
440	POP_L( EBX )
441	POP_L( EDI )
442	POP_L( ESI )
443	RET
444#undef FRAME_OFFSET
445
446
447
448
449ALIGNTEXT16
450GLOBL GLNAME( _mesa_x86_transform_points4_2d )
451HIDDEN(_mesa_x86_transform_points4_2d)
452GLNAME( _mesa_x86_transform_points4_2d ):
453
454#define FRAME_OFFSET 16
455	PUSH_L( ESI )
456	PUSH_L( EDI )
457	PUSH_L( EBX )
458	PUSH_L( EBP )
459
460	MOV_L( ARG_SOURCE, ESI )
461	MOV_L( ARG_DEST, EDI )
462
463	MOV_L( ARG_MATRIX, EDX )
464	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
465
466	TEST_L( ECX, ECX )
467	JZ( LLBL(x86_p4_2dr_done) )
468
469	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
470	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
471
472	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
473	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
474
475	SHL_L( CONST(4), ECX )
476	MOV_L( REGOFF(V4F_START, ESI), ESI )
477
478	MOV_L( REGOFF(V4F_START, EDI), EDI )
479	ADD_L( EDI, ECX )
480
481ALIGNTEXT16
482LLBL(x86_p4_2dr_loop):
483
484	FLD_S( SRC0 )			/* F4 */
485	FMUL_S( MAT0 )
486	FLD_S( SRC0 )			/* F5 F4 */
487	FMUL_S( MAT1 )
488
489	FLD_S( SRC1 )			/* F0 F5 F4 */
490	FMUL_S( MAT4 )
491	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
492	FMUL_S( MAT5 )
493
494	FXCH( ST(1) )			/* F0 F1 F5 F4 */
495	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
496	FADDP( ST0, ST(1) )		/* F5 F4 */
497
498	FLD_S( SRC3 )			/* F0 F5 F4 */
499	FMUL_S( MAT12 )
500	FLD_S( SRC3 )			/* F1 F0 F5 F4 */
501	FMUL_S( MAT13 )
502
503	FXCH( ST(1) )			/* F0 F1 F5 F4 */
504	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
505	FADDP( ST0, ST(1) )		/* F5 F4 */
506
507	MOV_L( SRC2, EBX )
508	MOV_L( SRC3, EBP )
509
510	FXCH( ST(1) )			/* F4 F5 */
511	FSTP_S( DST0 )		/* F5 */
512	FSTP_S( DST1 )		/* */
513	MOV_L( EBX, DST2 )
514	MOV_L( EBP, DST3 )
515
516LLBL(x86_p4_2dr_skip):
517
518	ADD_L( CONST(16), EDI )
519	ADD_L( EAX, ESI )
520	CMP_L( ECX, EDI )
521	JNE( LLBL(x86_p4_2dr_loop) )
522
523LLBL(x86_p4_2dr_done):
524
525	POP_L( EBP )
526	POP_L( EBX )
527	POP_L( EDI )
528	POP_L( ESI )
529	RET
530#undef FRAME_OFFSET
531
532
533
534
535ALIGNTEXT16
536GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
537HIDDEN(_mesa_x86_transform_points4_2d_no_rot)
538GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
539
540#define FRAME_OFFSET 16
541	PUSH_L( ESI )
542	PUSH_L( EDI )
543	PUSH_L( EBX )
544	PUSH_L( EBP )
545
546	MOV_L( ARG_SOURCE, ESI )
547	MOV_L( ARG_DEST, EDI )
548
549	MOV_L( ARG_MATRIX, EDX )
550	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
551
552	TEST_L( ECX, ECX )
553	JZ( LLBL(x86_p4_2dnrr_done) )
554
555	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
556	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
557
558	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
559	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
560
561	SHL_L( CONST(4), ECX )
562	MOV_L( REGOFF(V4F_START, ESI), ESI )
563
564	MOV_L( REGOFF(V4F_START, EDI), EDI )
565	ADD_L( EDI, ECX )
566
567ALIGNTEXT16
568LLBL(x86_p4_2dnrr_loop):
569
570	FLD_S( SRC0 )			/* F4 */
571	FMUL_S( MAT0 )
572
573	FLD_S( SRC1 )			/* F5 F4 */
574	FMUL_S( MAT5 )
575
576	FLD_S( SRC3 )			/* F0 F5 F4 */
577	FMUL_S( MAT12 )
578	FLD_S( SRC3 )			/* F1 F0 F5 F4 */
579	FMUL_S( MAT13 )
580
581	FXCH( ST(1) )			/* F0 F1 F5 F4 */
582	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
583	FADDP( ST0, ST(1) )		/* F5 F4 */
584
585	MOV_L( SRC2, EBX )
586	MOV_L( SRC3, EBP )
587
588	FXCH( ST(1) )			/* F4 F5 */
589	FSTP_S( DST0   )		/* F5 */
590	FSTP_S( DST1   )		/* */
591	MOV_L( EBX, DST2 )
592	MOV_L( EBP, DST3 )
593
594LLBL(x86_p4_2dnrr_skip):
595
596	ADD_L( CONST(16), EDI )
597	ADD_L( EAX, ESI )
598	CMP_L( ECX, EDI )
599	JNE( LLBL(x86_p4_2dnrr_loop) )
600
601LLBL(x86_p4_2dnrr_done):
602
603	POP_L( EBP )
604	POP_L( EBX )
605	POP_L( EDI )
606	POP_L( ESI )
607	RET
608#undef FRAME_OFFSET
609
610
611
612
613ALIGNTEXT16
614GLOBL GLNAME( _mesa_x86_transform_points4_identity )
615HIDDEN(_mesa_x86_transform_points4_identity)
616GLNAME( _mesa_x86_transform_points4_identity ):
617
618#define FRAME_OFFSET 12
619	PUSH_L( ESI )
620	PUSH_L( EDI )
621	PUSH_L( EBX )
622
623	MOV_L( ARG_SOURCE, ESI )
624	MOV_L( ARG_DEST, EDI )
625
626	MOV_L( ARG_MATRIX, EDX )
627	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
628
629	TEST_L( ECX, ECX )
630	JZ( LLBL(x86_p4_ir_done) )
631
632	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
633	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
634
635	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
636	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
637
638	SHL_L( CONST(4), ECX )
639	MOV_L( REGOFF(V4F_START, ESI), ESI )
640
641	MOV_L( REGOFF(V4F_START, EDI), EDI )
642	ADD_L( EDI, ECX )
643
644	CMP_L( ESI, EDI )
645	JE( LLBL(x86_p4_ir_done) )
646
647ALIGNTEXT16
648LLBL(x86_p4_ir_loop):
649
650	MOV_L( SRC0, EBX )
651	MOV_L( SRC1, EDX )
652
653	MOV_L( EBX, DST0 )
654	MOV_L( EDX, DST1 )
655
656	MOV_L( SRC2, EBX )
657	MOV_L( SRC3, EDX )
658
659	MOV_L( EBX, DST2 )
660	MOV_L( EDX, DST3 )
661
662LLBL(x86_p4_ir_skip):
663
664	ADD_L( CONST(16), EDI )
665	ADD_L( EAX, ESI )
666	CMP_L( ECX, EDI )
667	JNE( LLBL(x86_p4_ir_loop) )
668
669LLBL(x86_p4_ir_done):
670
671	POP_L( EBX )
672	POP_L( EDI )
673	POP_L( ESI )
674	RET
675
676#if defined (__ELF__) && defined (__linux__)
677	.section .note.GNU-stack,"",%progbits
678#endif
679