1 2/* 3 * Mesa 3-D graphics library 4 * Version: 3.5 5 * 6 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included 16 * in all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26/* 27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially 28 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces 29 * in there will break the build on some platforms. 30 */ 31 32#include "assyntax.h" 33#include "matypes.h" 34#include "xform_args.h" 35 36 SEG_TEXT 37 38#define FP_ONE 1065353216 39#define FP_ZERO 0 40 41#define SRC0 REGOFF(0, ESI) 42#define SRC1 REGOFF(4, ESI) 43#define SRC2 REGOFF(8, ESI) 44#define SRC3 REGOFF(12, ESI) 45#define DST0 REGOFF(0, EDI) 46#define DST1 REGOFF(4, EDI) 47#define DST2 REGOFF(8, EDI) 48#define DST3 REGOFF(12, EDI) 49#define MAT0 REGOFF(0, EDX) 50#define MAT1 REGOFF(4, EDX) 51#define MAT2 REGOFF(8, EDX) 52#define MAT3 REGOFF(12, EDX) 53#define MAT4 REGOFF(16, EDX) 54#define MAT5 REGOFF(20, EDX) 55#define MAT6 REGOFF(24, EDX) 56#define MAT7 REGOFF(28, EDX) 57#define MAT8 REGOFF(32, EDX) 58#define MAT9 REGOFF(36, EDX) 59#define MAT10 REGOFF(40, EDX) 60#define MAT11 REGOFF(44, EDX) 61#define MAT12 REGOFF(48, EDX) 62#define MAT13 REGOFF(52, EDX) 63#define MAT14 REGOFF(56, EDX) 64#define MAT15 REGOFF(60, EDX) 65 66 67ALIGNTEXT16 68GLOBL GLNAME( _mesa_x86_transform_points4_general ) 69HIDDEN(_mesa_x86_transform_points4_general) 70GLNAME( _mesa_x86_transform_points4_general ): 71 72#define FRAME_OFFSET 8 73 PUSH_L( ESI ) 74 PUSH_L( EDI ) 75 76 MOV_L( ARG_SOURCE, ESI ) 77 MOV_L( ARG_DEST, EDI ) 78 79 MOV_L( ARG_MATRIX, EDX ) 80 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 81 82 TEST_L( ECX, ECX ) 83 JZ( LLBL(x86_p4_gr_done) ) 84 85 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 86 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 87 88 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 89 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 90 91 SHL_L( CONST(4), ECX ) 92 MOV_L( REGOFF(V4F_START, ESI), ESI ) 93 94 MOV_L( REGOFF(V4F_START, EDI), EDI ) 95 ADD_L( EDI, ECX ) 96 97ALIGNTEXT16 98LLBL(x86_p4_gr_loop): 99 100 FLD_S( SRC0 ) /* F4 */ 101 FMUL_S( MAT0 ) 102 FLD_S( SRC0 ) /* F5 F4 */ 103 FMUL_S( MAT1 ) 104 FLD_S( SRC0 ) /* F6 F5 F4 */ 105 FMUL_S( MAT2 ) 106 FLD_S( SRC0 ) /* F7 F6 F5 F4 */ 107 FMUL_S( MAT3 ) 108 109 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */ 110 FMUL_S( MAT4 ) 111 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */ 112 FMUL_S( MAT5 ) 113 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */ 114 FMUL_S( MAT6 ) 115 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ 116 FMUL_S( MAT7 ) 117 118 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ 119 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ 120 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ 121 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ 122 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ 123 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ 124 125 FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */ 126 FMUL_S( MAT8 ) 127 FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */ 128 FMUL_S( MAT9 ) 129 FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */ 130 FMUL_S( MAT10 ) 131 FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ 132 FMUL_S( MAT11 ) 133 134 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ 135 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ 136 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ 137 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ 138 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ 139 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ 140 141 FLD_S( SRC3 ) /* F0 F7 F6 F5 F4 */ 142 FMUL_S( MAT12 ) 143 FLD_S( SRC3 ) /* F1 F0 F7 F6 F5 F4 */ 144 FMUL_S( MAT13 ) 145 FLD_S( SRC3 ) /* F2 F1 F0 F7 F6 F5 F4 */ 146 FMUL_S( MAT14 ) 147 FLD_S( SRC3 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ 148 FMUL_S( MAT15 ) 149 150 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ 151 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ 152 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ 153 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ 154 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ 155 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ 156 157 FXCH( ST(3) ) /* F4 F6 F5 F7 */ 158 FSTP_S( DST0 ) /* F6 F5 F7 */ 159 FXCH( ST(1) ) /* F5 F6 F7 */ 160 FSTP_S( DST1 ) /* F6 F7 */ 161 FSTP_S( DST2 ) /* F7 */ 162 FSTP_S( DST3 ) /* */ 163 164LLBL(x86_p4_gr_skip): 165 166 ADD_L( CONST(16), EDI ) 167 ADD_L( EAX, ESI ) 168 CMP_L( ECX, EDI ) 169 JNE( LLBL(x86_p4_gr_loop) ) 170 171LLBL(x86_p4_gr_done): 172 173 POP_L( EDI ) 174 POP_L( ESI ) 175 RET 176#undef FRAME_OFFSET 177 178 179 180 181ALIGNTEXT16 182GLOBL GLNAME( _mesa_x86_transform_points4_perspective ) 183HIDDEN(_mesa_x86_transform_points4_perspective) 184GLNAME( _mesa_x86_transform_points4_perspective ): 185 186#define FRAME_OFFSET 12 187 PUSH_L( ESI ) 188 PUSH_L( EDI ) 189 PUSH_L( EBX ) 190 191 MOV_L( ARG_SOURCE, ESI ) 192 MOV_L( ARG_DEST, EDI ) 193 194 MOV_L( ARG_MATRIX, EDX ) 195 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 196 197 TEST_L( ECX, ECX ) 198 JZ( LLBL(x86_p4_pr_done) ) 199 200 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 201 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 202 203 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 204 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 205 206 SHL_L( CONST(4), ECX ) 207 MOV_L( REGOFF(V4F_START, ESI), ESI ) 208 209 MOV_L( REGOFF(V4F_START, EDI), EDI ) 210 ADD_L( EDI, ECX ) 211 212ALIGNTEXT16 213LLBL(x86_p4_pr_loop): 214 215 FLD_S( SRC0 ) /* F4 */ 216 FMUL_S( MAT0 ) 217 218 FLD_S( SRC1 ) /* F5 F4 */ 219 FMUL_S( MAT5 ) 220 221 FLD_S( SRC2 ) /* F0 F5 F4 */ 222 FMUL_S( MAT8 ) 223 FLD_S( SRC2 ) /* F1 F0 F5 F4 */ 224 FMUL_S( MAT9 ) 225 FLD_S( SRC2 ) /* F6 F1 F0 F5 F4 */ 226 FMUL_S( MAT10 ) 227 228 FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */ 229 FADDP( ST0, ST(4) ) /* F1 F6 F5 F4 */ 230 FADDP( ST0, ST(2) ) /* F6 F5 F4 */ 231 232 FLD_S( SRC3 ) /* F2 F6 F5 F4 */ 233 FMUL_S( MAT14 ) 234 235 FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 236 237 MOV_L( SRC2, EBX ) 238 XOR_L( CONST(-2147483648), EBX )/* change sign */ 239 240 FXCH( ST(2) ) /* F4 F5 F6 */ 241 FSTP_S( DST0 ) /* F5 F6 */ 242 FSTP_S( DST1 ) /* F6 */ 243 FSTP_S( DST2 ) /* */ 244 MOV_L( EBX, DST3 ) 245 246LLBL(x86_p4_pr_skip): 247 248 ADD_L( CONST(16), EDI ) 249 ADD_L( EAX, ESI ) 250 CMP_L( ECX, EDI ) 251 JNE( LLBL(x86_p4_pr_loop) ) 252 253LLBL(x86_p4_pr_done): 254 255 POP_L( EBX ) 256 POP_L( EDI ) 257 POP_L( ESI ) 258 RET 259#undef FRAME_OFFSET 260 261 262 263 264ALIGNTEXT16 265GLOBL GLNAME( _mesa_x86_transform_points4_3d ) 266HIDDEN(_mesa_x86_transform_points4_3d) 267GLNAME( _mesa_x86_transform_points4_3d ): 268 269#define FRAME_OFFSET 12 270 PUSH_L( ESI ) 271 PUSH_L( EDI ) 272 PUSH_L( EBX ) 273 274 MOV_L( ARG_SOURCE, ESI ) 275 MOV_L( ARG_DEST, EDI ) 276 277 MOV_L( ARG_MATRIX, EDX ) 278 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 279 280 TEST_L( ECX, ECX ) 281 JZ( LLBL(x86_p4_3dr_done) ) 282 283 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 284 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 285 286 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 287 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 288 289 SHL_L( CONST(4), ECX ) 290 MOV_L( REGOFF(V4F_START, ESI), ESI ) 291 292 MOV_L( REGOFF(V4F_START, EDI), EDI ) 293 ADD_L( EDI, ECX ) 294 295ALIGNTEXT16 296LLBL(x86_p4_3dr_loop): 297 298 FLD_S( SRC0 ) /* F4 */ 299 FMUL_S( MAT0 ) 300 FLD_S( SRC0 ) /* F5 F4 */ 301 FMUL_S( MAT1 ) 302 FLD_S( SRC0 ) /* F6 F5 F4 */ 303 FMUL_S( MAT2 ) 304 305 FLD_S( SRC1 ) /* F0 F6 F5 F4 */ 306 FMUL_S( MAT4 ) 307 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */ 308 FMUL_S( MAT5 ) 309 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */ 310 FMUL_S( MAT6 ) 311 312 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 313 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 314 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 315 FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 316 317 FLD_S( SRC2 ) /* F0 F6 F5 F4 */ 318 FMUL_S( MAT8 ) 319 FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */ 320 FMUL_S( MAT9 ) 321 FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */ 322 FMUL_S( MAT10 ) 323 324 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 325 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 326 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 327 FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 328 329 FLD_S( SRC3 ) /* F0 F6 F5 F4 */ 330 FMUL_S( MAT12 ) 331 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */ 332 FMUL_S( MAT13 ) 333 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */ 334 FMUL_S( MAT14 ) 335 336 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 337 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 338 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 339 FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 340 341 MOV_L( SRC3, EBX ) 342 343 FXCH( ST(2) ) /* F4 F5 F6 */ 344 FSTP_S( DST0 ) /* F5 F6 */ 345 FSTP_S( DST1 ) /* F6 */ 346 FSTP_S( DST2 ) /* */ 347 MOV_L( EBX, DST3 ) 348 349LLBL(x86_p4_3dr_skip): 350 351 ADD_L( CONST(16), EDI ) 352 ADD_L( EAX, ESI ) 353 CMP_L( ECX, EDI ) 354 JNE( LLBL(x86_p4_3dr_loop) ) 355 356LLBL(x86_p4_3dr_done): 357 358 POP_L( EBX ) 359 POP_L( EDI ) 360 POP_L( ESI ) 361 RET 362#undef FRAME_OFFSET 363 364 365 366 367ALIGNTEXT16 368GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot) 369HIDDEN(_mesa_x86_transform_points4_3d_no_rot) 370GLNAME(_mesa_x86_transform_points4_3d_no_rot): 371 372#define FRAME_OFFSET 12 373 PUSH_L( ESI ) 374 PUSH_L( EDI ) 375 PUSH_L( EBX ) 376 377 MOV_L( ARG_SOURCE, ESI ) 378 MOV_L( ARG_DEST, EDI ) 379 380 MOV_L( ARG_MATRIX, EDX ) 381 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 382 383 TEST_L( ECX, ECX ) 384 JZ( LLBL(x86_p4_3dnrr_done) ) 385 386 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 387 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 388 389 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 390 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 391 392 SHL_L( CONST(4), ECX ) 393 MOV_L( REGOFF(V4F_START, ESI), ESI ) 394 395 MOV_L( REGOFF(V4F_START, EDI), EDI ) 396 ADD_L( EDI, ECX ) 397 398ALIGNTEXT16 399LLBL(x86_p4_3dnrr_loop): 400 401 FLD_S( SRC0 ) /* F4 */ 402 FMUL_S( MAT0 ) 403 404 FLD_S( SRC1 ) /* F5 F4 */ 405 FMUL_S( MAT5 ) 406 407 FLD_S( SRC2 ) /* F6 F5 F4 */ 408 FMUL_S( MAT10 ) 409 410 FLD_S( SRC3 ) /* F0 F6 F5 F4 */ 411 FMUL_S( MAT12 ) 412 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */ 413 FMUL_S( MAT13 ) 414 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */ 415 FMUL_S( MAT14 ) 416 417 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 418 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 419 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 420 FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 421 422 MOV_L( SRC3, EBX ) 423 424 FXCH( ST(2) ) /* F4 F5 F6 */ 425 FSTP_S( DST0 ) /* F5 F6 */ 426 FSTP_S( DST1 ) /* F6 */ 427 FSTP_S( DST2 ) /* */ 428 MOV_L( EBX, DST3 ) 429 430LLBL(x86_p4_3dnrr_skip): 431 432 ADD_L( CONST(16), EDI ) 433 ADD_L( EAX, ESI ) 434 CMP_L( ECX, EDI ) 435 JNE( LLBL(x86_p4_3dnrr_loop) ) 436 437LLBL(x86_p4_3dnrr_done): 438 439 POP_L( EBX ) 440 POP_L( EDI ) 441 POP_L( ESI ) 442 RET 443#undef FRAME_OFFSET 444 445 446 447 448ALIGNTEXT16 449GLOBL GLNAME( _mesa_x86_transform_points4_2d ) 450HIDDEN(_mesa_x86_transform_points4_2d) 451GLNAME( _mesa_x86_transform_points4_2d ): 452 453#define FRAME_OFFSET 16 454 PUSH_L( ESI ) 455 PUSH_L( EDI ) 456 PUSH_L( EBX ) 457 PUSH_L( EBP ) 458 459 MOV_L( ARG_SOURCE, ESI ) 460 MOV_L( ARG_DEST, EDI ) 461 462 MOV_L( ARG_MATRIX, EDX ) 463 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 464 465 TEST_L( ECX, ECX ) 466 JZ( LLBL(x86_p4_2dr_done) ) 467 468 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 469 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 470 471 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 472 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 473 474 SHL_L( CONST(4), ECX ) 475 MOV_L( REGOFF(V4F_START, ESI), ESI ) 476 477 MOV_L( REGOFF(V4F_START, EDI), EDI ) 478 ADD_L( EDI, ECX ) 479 480ALIGNTEXT16 481LLBL(x86_p4_2dr_loop): 482 483 FLD_S( SRC0 ) /* F4 */ 484 FMUL_S( MAT0 ) 485 FLD_S( SRC0 ) /* F5 F4 */ 486 FMUL_S( MAT1 ) 487 488 FLD_S( SRC1 ) /* F0 F5 F4 */ 489 FMUL_S( MAT4 ) 490 FLD_S( SRC1 ) /* F1 F0 F5 F4 */ 491 FMUL_S( MAT5 ) 492 493 FXCH( ST(1) ) /* F0 F1 F5 F4 */ 494 FADDP( ST0, ST(3) ) /* F1 F5 F4 */ 495 FADDP( ST0, ST(1) ) /* F5 F4 */ 496 497 FLD_S( SRC3 ) /* F0 F5 F4 */ 498 FMUL_S( MAT12 ) 499 FLD_S( SRC3 ) /* F1 F0 F5 F4 */ 500 FMUL_S( MAT13 ) 501 502 FXCH( ST(1) ) /* F0 F1 F5 F4 */ 503 FADDP( ST0, ST(3) ) /* F1 F5 F4 */ 504 FADDP( ST0, ST(1) ) /* F5 F4 */ 505 506 MOV_L( SRC2, EBX ) 507 MOV_L( SRC3, EBP ) 508 509 FXCH( ST(1) ) /* F4 F5 */ 510 FSTP_S( DST0 ) /* F5 */ 511 FSTP_S( DST1 ) /* */ 512 MOV_L( EBX, DST2 ) 513 MOV_L( EBP, DST3 ) 514 515LLBL(x86_p4_2dr_skip): 516 517 ADD_L( CONST(16), EDI ) 518 ADD_L( EAX, ESI ) 519 CMP_L( ECX, EDI ) 520 JNE( LLBL(x86_p4_2dr_loop) ) 521 522LLBL(x86_p4_2dr_done): 523 524 POP_L( EBP ) 525 POP_L( EBX ) 526 POP_L( EDI ) 527 POP_L( ESI ) 528 RET 529#undef FRAME_OFFSET 530 531 532 533 534ALIGNTEXT16 535GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot ) 536HIDDEN(_mesa_x86_transform_points4_2d_no_rot) 537GLNAME( _mesa_x86_transform_points4_2d_no_rot ): 538 539#define FRAME_OFFSET 16 540 PUSH_L( ESI ) 541 PUSH_L( EDI ) 542 PUSH_L( EBX ) 543 PUSH_L( EBP ) 544 545 MOV_L( ARG_SOURCE, ESI ) 546 MOV_L( ARG_DEST, EDI ) 547 548 MOV_L( ARG_MATRIX, EDX ) 549 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 550 551 TEST_L( ECX, ECX ) 552 JZ( LLBL(x86_p4_2dnrr_done) ) 553 554 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 555 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 556 557 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 558 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 559 560 SHL_L( CONST(4), ECX ) 561 MOV_L( REGOFF(V4F_START, ESI), ESI ) 562 563 MOV_L( REGOFF(V4F_START, EDI), EDI ) 564 ADD_L( EDI, ECX ) 565 566ALIGNTEXT16 567LLBL(x86_p4_2dnrr_loop): 568 569 FLD_S( SRC0 ) /* F4 */ 570 FMUL_S( MAT0 ) 571 572 FLD_S( SRC1 ) /* F5 F4 */ 573 FMUL_S( MAT5 ) 574 575 FLD_S( SRC3 ) /* F0 F5 F4 */ 576 FMUL_S( MAT12 ) 577 FLD_S( SRC3 ) /* F1 F0 F5 F4 */ 578 FMUL_S( MAT13 ) 579 580 FXCH( ST(1) ) /* F0 F1 F5 F4 */ 581 FADDP( ST0, ST(3) ) /* F1 F5 F4 */ 582 FADDP( ST0, ST(1) ) /* F5 F4 */ 583 584 MOV_L( SRC2, EBX ) 585 MOV_L( SRC3, EBP ) 586 587 FXCH( ST(1) ) /* F4 F5 */ 588 FSTP_S( DST0 ) /* F5 */ 589 FSTP_S( DST1 ) /* */ 590 MOV_L( EBX, DST2 ) 591 MOV_L( EBP, DST3 ) 592 593LLBL(x86_p4_2dnrr_skip): 594 595 ADD_L( CONST(16), EDI ) 596 ADD_L( EAX, ESI ) 597 CMP_L( ECX, EDI ) 598 JNE( LLBL(x86_p4_2dnrr_loop) ) 599 600LLBL(x86_p4_2dnrr_done): 601 602 POP_L( EBP ) 603 POP_L( EBX ) 604 POP_L( EDI ) 605 POP_L( ESI ) 606 RET 607#undef FRAME_OFFSET 608 609 610 611 612ALIGNTEXT16 613GLOBL GLNAME( _mesa_x86_transform_points4_identity ) 614HIDDEN(_mesa_x86_transform_points4_identity) 615GLNAME( _mesa_x86_transform_points4_identity ): 616 617#define FRAME_OFFSET 12 618 PUSH_L( ESI ) 619 PUSH_L( EDI ) 620 PUSH_L( EBX ) 621 622 MOV_L( ARG_SOURCE, ESI ) 623 MOV_L( ARG_DEST, EDI ) 624 625 MOV_L( ARG_MATRIX, EDX ) 626 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 627 628 TEST_L( ECX, ECX ) 629 JZ( LLBL(x86_p4_ir_done) ) 630 631 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 632 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 633 634 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 635 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 636 637 SHL_L( CONST(4), ECX ) 638 MOV_L( REGOFF(V4F_START, ESI), ESI ) 639 640 MOV_L( REGOFF(V4F_START, EDI), EDI ) 641 ADD_L( EDI, ECX ) 642 643 CMP_L( ESI, EDI ) 644 JE( LLBL(x86_p4_ir_done) ) 645 646ALIGNTEXT16 647LLBL(x86_p4_ir_loop): 648 649 MOV_L( SRC0, EBX ) 650 MOV_L( SRC1, EDX ) 651 652 MOV_L( EBX, DST0 ) 653 MOV_L( EDX, DST1 ) 654 655 MOV_L( SRC2, EBX ) 656 MOV_L( SRC3, EDX ) 657 658 MOV_L( EBX, DST2 ) 659 MOV_L( EDX, DST3 ) 660 661LLBL(x86_p4_ir_skip): 662 663 ADD_L( CONST(16), EDI ) 664 ADD_L( EAX, ESI ) 665 CMP_L( ECX, EDI ) 666 JNE( LLBL(x86_p4_ir_loop) ) 667 668LLBL(x86_p4_ir_done): 669 670 POP_L( EBX ) 671 POP_L( EDI ) 672 POP_L( ESI ) 673 RET 674 675#if defined (__ELF__) && defined (__linux__) 676 .section .note.GNU-stack,"",%progbits 677#endif 678