1 2/* 3 * Mesa 3-D graphics library 4 * 5 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 23 * OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26#ifdef USE_3DNOW_ASM 27#include "assyntax.h" 28#include "matypes.h" 29#include "xform_args.h" 30 31 SEG_TEXT 32 33#define FRAME_OFFSET 4 34 35 36ALIGNTEXT16 37GLOBL GLNAME( _mesa_3dnow_transform_points4_general ) 38HIDDEN(_mesa_3dnow_transform_points4_general) 39GLNAME( _mesa_3dnow_transform_points4_general ): 40 41 PUSH_L ( ESI ) 42 43 MOV_L ( ARG_DEST, ECX ) 44 MOV_L ( ARG_MATRIX, ESI ) 45 MOV_L ( ARG_SOURCE, EAX ) 46 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) 47 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) 48 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) 49 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) 50 51 PUSH_L ( EDI ) 52 53 MOV_L ( REGOFF(V4F_START, ECX), EDX ) 54 MOV_L ( ESI, ECX ) 55 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) 56 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) 57 MOV_L ( REGOFF(V4F_START, EAX), EAX ) 58 59 TEST_L ( ESI, ESI ) 60 JZ ( LLBL( G3TPGR_2 ) ) 61 62 PREFETCHW ( REGIND(EDX) ) 63 64ALIGNTEXT16 65LLBL( G3TPGR_1 ): 66 67 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ 68 69 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ 70 MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */ 71 72 ADD_L ( EDI, EAX ) /* next vertex */ 73 PREFETCH ( REGIND(EAX) ) 74 75 MOVQ ( MM0, MM2 ) /* x1 | x0 */ 76 MOVQ ( MM4, MM6 ) /* x3 | x2 */ 77 78 PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ 79 PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */ 80 81 MOVQ ( MM0, MM1 ) /* x0 | x0 */ 82 ADD_L ( CONST(16), EDX ) /* next r */ 83 84 PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ 85 MOVQ ( MM2, MM3 ) /* x1 | x1 */ 86 87 PFMUL ( REGOFF(8, ECX), MM1 ) /* x0*m3 | x0*m2 */ 88 PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */ 89 90 PFMUL ( REGOFF(16, ECX), MM2 ) /* x1*m5 | x1*m4 */ 91 MOVQ ( MM4, MM5 ) /* x2 | x2 */ 92 93 PFMUL ( REGOFF(24, ECX), MM3 ) /* x1*m7 | x1*m6 */ 94 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ 95 96 PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */ 97 MOVQ ( MM6, MM7 ) /* x3 | x3 */ 98 99 PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */ 100 PFADD ( MM0, MM2 ) 101 102 PFMUL ( REGOFF(48, ECX), MM6 ) /* x3*m13 | x3*m12 */ 103 PFADD ( MM1, MM3 ) 104 105 PFMUL ( REGOFF(56, ECX), MM7 ) /* x3*m15 | x3*m14 */ 106 PFADD ( MM4, MM6 ) 107 108 PFADD ( MM5, MM7 ) 109 PFADD ( MM2, MM6 ) 110 111 PFADD ( MM3, MM7 ) 112 MOVQ ( MM6, REGOFF(-16, EDX) ) 113 114 MOVQ ( MM7, REGOFF(-8, EDX) ) 115 116 DEC_L ( ESI ) /* decrement vertex counter */ 117 JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */ 118 119LLBL( G3TPGR_2 ): 120 121 FEMMS 122 POP_L ( EDI ) 123 POP_L ( ESI ) 124 RET 125 126 127 128 129ALIGNTEXT16 130GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective ) 131HIDDEN(_mesa_3dnow_transform_points4_perspective) 132GLNAME( _mesa_3dnow_transform_points4_perspective ): 133 134 PUSH_L ( ESI ) 135 136 MOV_L ( ARG_DEST, ECX ) 137 MOV_L ( ARG_MATRIX, ESI ) 138 MOV_L ( ARG_SOURCE, EAX ) 139 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) 140 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) 141 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) 142 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) 143 144 PUSH_L ( EDI ) 145 146 MOV_L ( REGOFF(V4F_START, ECX), EDX ) 147 MOV_L ( ESI, ECX ) 148 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) 149 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) 150 MOV_L ( REGOFF(V4F_START, EAX), EAX ) 151 152 TEST_L ( ESI, ESI ) 153 JZ ( LLBL( G3TPPR_2 ) ) 154 155 PREFETCH ( REGIND(EAX) ) 156 PREFETCHW ( REGIND(EDX) ) 157 158 MOVD ( REGIND(ECX), MM0 ) /* | m00 */ 159 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ 160 161 MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */ 162 PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m32 | m22 */ 163 164 MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */ 165 PXOR ( MM7, MM7 ) /* 0 | 0 */ 166 167ALIGNTEXT16 168LLBL( G3TPPR_1 ): 169 170 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ 171 172 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ 173 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ 174 MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */ 175 176 ADD_L ( EDI, EAX ) /* next vertex */ 177 PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */ 178 179 MOVQ ( MM5, MM6 ) /* x3 | x2 */ 180 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ 181 182 PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */ 183 ADD_L ( CONST(16), EDX ) /* next r */ 184 185 PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */ 186 PFSUBR ( MM7, MM3 ) /* | -x2 */ 187 188 PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */ 189 PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */ 190 191 PFACC ( MM3, MM6 ) /* -x2 | x2*m22+x3*m32 */ 192 MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */ 193 194 MOVQ ( MM6, REGOFF(-8, EDX) ) /* write r2, r3 */ 195 DEC_L ( ESI ) /* decrement vertex counter */ 196 197 JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */ 198 199LLBL( G3TPPR_2 ): 200 201 FEMMS 202 POP_L ( EDI ) 203 POP_L ( ESI ) 204 RET 205 206 207 208 209ALIGNTEXT16 210GLOBL GLNAME( _mesa_3dnow_transform_points4_3d ) 211HIDDEN(_mesa_3dnow_transform_points4_3d) 212GLNAME( _mesa_3dnow_transform_points4_3d ): 213 214 PUSH_L ( ESI ) 215 216 MOV_L ( ARG_DEST, ECX ) 217 MOV_L ( ARG_MATRIX, ESI ) 218 MOV_L ( ARG_SOURCE, EAX ) 219 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) 220 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) 221 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) 222 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) 223 224 PUSH_L ( EDI ) 225 226 MOV_L ( REGOFF(V4F_START, ECX), EDX ) 227 MOV_L ( ESI, ECX ) 228 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) 229 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) 230 MOV_L ( REGOFF(V4F_START, EAX), EAX ) 231 232 TEST_L ( ESI, ESI ) 233 JZ ( LLBL( G3TP3R_2 ) ) 234 235 MOVD ( REGOFF(8, ECX), MM6 ) /* | m2 */ 236 PUNPCKLDQ ( REGOFF(24, ECX), MM6 ) /* m6 | m2 */ 237 238 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ 239 PUNPCKLDQ ( REGOFF(56, ECX), MM7 ) /* m14 | m10 */ 240 241ALIGNTEXT16 242LLBL( G3TP3R_1 ): 243 244 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ 245 PREFETCH ( REGOFF(32, EAX) ) /* hopefully array is tightly packed */ 246 247 MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */ 248 MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */ 249 250 MOVQ ( MM2, MM0 ) /* x1 | x0 */ 251 MOVQ ( MM3, MM4 ) /* x3 | x2 */ 252 253 MOVQ ( MM0, MM1 ) /* x1 | x0 */ 254 MOVQ ( MM4, MM5 ) /* x3 | x2 */ 255 256 PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ 257 PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */ 258 259 PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ 260 PUNPCKLDQ ( MM3, MM3 ) /* x2 | x2 */ 261 262 PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */ 263 PUNPCKHDQ ( MM4, MM4 ) /* x3 | x3 */ 264 265 PFMUL ( MM6, MM2 ) /* x1*m6 | x0*m2 */ 266 PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ 267 268 PFMUL ( REGOFF(32, ECX), MM3 ) /* x2*m9 | x2*m8 */ 269 ADD_L ( CONST(16), EDX ) /* next r */ 270 271 PFMUL ( REGOFF(48, ECX), MM4 ) /* x3*m13 | x3*m12 */ 272 PFADD ( MM1, MM3 ) /* x0*m1+..+x2*m9 | x0*m0+...+x2*m8 */ 273 274 PFMUL ( MM7, MM5 ) /* x3*m14 | x2*m10 */ 275 PFADD ( MM3, MM4 ) /* r1 | r0 */ 276 277 PFACC ( MM2, MM5 ) /* x0*m2+x1*m6 | x2*m10+x3*m14 */ 278 MOVD ( REGOFF(12, EAX), MM0 ) /* | x3 */ 279 280 ADD_L ( EDI, EAX ) /* next vertex */ 281 PFACC ( MM0, MM5 ) /* r3 | r2 */ 282 283 MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ 284 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ 285 286 DEC_L ( ESI ) /* decrement vertex counter */ 287 JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */ 288 289LLBL( G3TP3R_2 ): 290 291 FEMMS 292 POP_L ( EDI ) 293 POP_L ( ESI ) 294 RET 295 296 297 298 299ALIGNTEXT16 300GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ) 301HIDDEN(_mesa_3dnow_transform_points4_3d_no_rot) 302GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ): 303 304 PUSH_L ( ESI ) 305 MOV_L ( ARG_DEST, ECX ) 306 MOV_L ( ARG_MATRIX, ESI ) 307 MOV_L ( ARG_SOURCE, EAX ) 308 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) 309 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) 310 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) 311 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) 312 313 PUSH_L ( EDI ) 314 315 MOV_L ( REGOFF(V4F_START, ECX), EDX ) 316 MOV_L ( ESI, ECX ) 317 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) 318 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) 319 MOV_L ( REGOFF(V4F_START, EAX), EAX ) 320 321 TEST_L ( ESI, ESI ) 322 JZ ( LLBL( G3TP3NRR_2 ) ) 323 324 MOVD ( REGIND(ECX), MM0 ) /* | m00 */ 325 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ 326 327 MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ 328 PUNPCKLDQ ( REGOFF(56, ECX), MM2 ) /* m32 | m22 */ 329 330 MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ 331 332ALIGNTEXT16 333LLBL( G3TP3NRR_1 ): 334 335 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ 336 337 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ 338 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ 339 MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */ 340 341 ADD_L ( EDI, EAX ) /* next vertex */ 342 PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */ 343 344 MOVQ ( MM5, MM6 ) /* x3 | x2 */ 345 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ 346 347 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ 348 PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */ 349 350 PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ 351 PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */ 352 353 PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ 354 ADD_L ( CONST(16), EDX ) /* next r */ 355 356 MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ 357 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ 358 359 DEC_L ( ESI ) /* decrement vertex counter */ 360 JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */ 361 362LLBL( G3TP3NRR_2 ): 363 364 FEMMS 365 POP_L ( EDI ) 366 POP_L ( ESI ) 367 RET 368 369 370 371 372ALIGNTEXT16 373GLOBL GLNAME( _mesa_3dnow_transform_points4_2d ) 374HIDDEN(_mesa_3dnow_transform_points4_2d) 375GLNAME( _mesa_3dnow_transform_points4_2d ): 376 377 PUSH_L ( ESI ) 378 379 MOV_L ( ARG_DEST, ECX ) 380 MOV_L ( ARG_MATRIX, ESI ) 381 MOV_L ( ARG_SOURCE, EAX ) 382 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) 383 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) 384 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) 385 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) 386 387 PUSH_L ( EDI ) 388 389 MOV_L ( REGOFF(V4F_START, ECX), EDX ) 390 MOV_L ( ESI, ECX ) 391 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) 392 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) 393 MOV_L ( REGOFF(V4F_START, EAX), EAX ) 394 395 TEST_L ( ESI, ESI ) 396 JZ ( LLBL( G3TP2R_2 ) ) 397 398 MOVD ( REGIND(ECX), MM0 ) /* | m00 */ 399 PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ 400 401 MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ 402 PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ 403 404 MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ 405 406ALIGNTEXT16 407LLBL( G3TP2R_1 ): 408 409 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ 410 411 MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ 412 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ 413 414 ADD_L ( EDI, EAX ) /* next vertex */ 415 PREFETCH ( REGIND(EAX) ) 416 417 MOVQ ( MM3, MM4 ) /* x1 | x0 */ 418 MOVQ ( MM5, MM6 ) /* x3 | x2 */ 419 420 PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */ 421 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ 422 423 PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */ 424 ADD_L ( CONST(16), EDX ) /* next r */ 425 426 PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ 427 PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */ 428 429 PFADD ( MM6, MM3 ) /* r1 | r0 */ 430 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ 431 432 MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */ 433 434 DEC_L ( ESI ) /* decrement vertex counter */ 435 JNZ ( LLBL( G3TP2R_1 ) ) /* cnt > 0 ? -> process next vertex */ 436 437LLBL( G3TP2R_2 ): 438 439 FEMMS 440 POP_L ( EDI ) 441 POP_L ( ESI ) 442 RET 443 444 445 446 447ALIGNTEXT16 448GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ) 449HIDDEN(_mesa_3dnow_transform_points4_2d_no_rot) 450GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ): 451 452 PUSH_L ( ESI ) 453 454 MOV_L ( ARG_DEST, ECX ) 455 MOV_L ( ARG_MATRIX, ESI ) 456 MOV_L ( ARG_SOURCE, EAX ) 457 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) 458 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) 459 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) 460 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) 461 462 PUSH_L ( EDI ) 463 464 MOV_L ( REGOFF(V4F_START, ECX), EDX ) 465 MOV_L ( ESI, ECX ) 466 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) 467 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) 468 MOV_L ( REGOFF(V4F_START, EAX), EAX ) 469 470 TEST_L ( ESI, ESI ) 471 JZ ( LLBL( G3TP2NRR_3 ) ) 472 473 MOVD ( REGIND(ECX), MM0 ) /* | m00 */ 474 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ 475 476 MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ 477 478ALIGNTEXT16 479LLBL( G3TP2NRR_2 ): 480 481 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ 482 483 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ 484 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ 485 486 ADD_L ( EDI, EAX ) /* next vertex */ 487 PREFETCH ( REGIND(EAX) ) 488 489 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ 490 MOVQ ( MM5, MM6 ) /* x3 | x2 */ 491 492 ADD_L ( CONST(16), EDX ) /* next r */ 493 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ 494 495 PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ 496 PFADD ( MM4, MM6 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ 497 498 MOVQ ( MM6, REGOFF(-16, EDX) ) /* write r0, r1 */ 499 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ 500 501 DEC_L ( ESI ) /* decrement vertex counter */ 502 503 JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ 504 505LLBL( G3TP2NRR_3 ): 506 507 FEMMS 508 POP_L ( EDI ) 509 POP_L ( ESI ) 510 RET 511 512 513 514 515ALIGNTEXT16 516GLOBL GLNAME( _mesa_3dnow_transform_points4_identity ) 517HIDDEN(_mesa_3dnow_transform_points4_identity) 518GLNAME( _mesa_3dnow_transform_points4_identity ): 519 520 PUSH_L ( ESI ) 521 522 MOV_L ( ARG_DEST, ECX ) 523 MOV_L ( ARG_MATRIX, ESI ) 524 MOV_L ( ARG_SOURCE, EAX ) 525 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) 526 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) 527 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) 528 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) 529 530 PUSH_L ( EDI ) 531 532 MOV_L ( REGOFF(V4F_START, ECX), EDX ) 533 MOV_L ( ESI, ECX ) 534 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) 535 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) 536 MOV_L ( REGOFF(V4F_START, EAX), EAX ) 537 538 TEST_L ( ESI, ESI ) 539 JZ ( LLBL( G3TPIR_2 ) ) 540 541ALIGNTEXT16 542LLBL( G3TPIR_1 ): 543 544 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ 545 546 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ 547 MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */ 548 549 ADD_L ( EDI, EAX ) /* next vertex */ 550 PREFETCH ( REGIND(EAX) ) 551 552 ADD_L ( CONST(16), EDX ) /* next r */ 553 MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */ 554 555 MOVQ ( MM1, REGOFF(-8, EDX) ) /* r3 | r2 */ 556 557 DEC_L ( ESI ) /* decrement vertex counter */ 558 JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */ 559 560LLBL( G3TPIR_2 ): 561 562 FEMMS 563 POP_L ( EDI ) 564 POP_L ( ESI ) 565 RET 566#endif 567 568#if defined (__ELF__) && defined (__linux__) 569 .section .note.GNU-stack,"",%progbits 570#endif 571