1 2/* 3 * Mesa 3-D graphics library 4 * 5 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 23 * OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26/* 27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially 28 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces 29 * in there will break the build on some platforms. 30 */ 31 32#include "assyntax.h" 33#define MATH_ASM_PTR_SIZE 4 34#include "math/m_vector_asm.h" 35#include "xform_args.h" 36 37 SEG_TEXT 38 39#define FP_ONE 1065353216 40#define FP_ZERO 0 41 42#define SRC0 REGOFF(0, ESI) 43#define SRC1 REGOFF(4, ESI) 44#define SRC2 REGOFF(8, ESI) 45#define SRC3 REGOFF(12, ESI) 46#define DST0 REGOFF(0, EDI) 47#define DST1 REGOFF(4, EDI) 48#define DST2 REGOFF(8, EDI) 49#define DST3 REGOFF(12, EDI) 50#define MAT0 REGOFF(0, EDX) 51#define MAT1 REGOFF(4, EDX) 52#define MAT2 REGOFF(8, EDX) 53#define MAT3 REGOFF(12, EDX) 54#define MAT4 REGOFF(16, EDX) 55#define MAT5 REGOFF(20, EDX) 56#define MAT6 REGOFF(24, EDX) 57#define MAT7 REGOFF(28, EDX) 58#define MAT8 REGOFF(32, EDX) 59#define MAT9 REGOFF(36, EDX) 60#define MAT10 REGOFF(40, EDX) 61#define MAT11 REGOFF(44, EDX) 62#define MAT12 REGOFF(48, EDX) 63#define MAT13 REGOFF(52, EDX) 64#define MAT14 REGOFF(56, EDX) 65#define MAT15 REGOFF(60, EDX) 66 67 68ALIGNTEXT16 69GLOBL GLNAME( _mesa_x86_transform_points4_general ) 70HIDDEN(_mesa_x86_transform_points4_general) 71GLNAME( _mesa_x86_transform_points4_general ): 72 73#define FRAME_OFFSET 8 74 PUSH_L( ESI ) 75 PUSH_L( EDI ) 76 77 MOV_L( ARG_SOURCE, ESI ) 78 MOV_L( ARG_DEST, EDI ) 79 80 MOV_L( ARG_MATRIX, EDX ) 81 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 82 83 TEST_L( ECX, ECX ) 84 JZ( LLBL(x86_p4_gr_done) ) 85 86 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 87 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 88 89 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 90 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 91 92 SHL_L( CONST(4), ECX ) 93 MOV_L( REGOFF(V4F_START, ESI), ESI ) 94 95 MOV_L( REGOFF(V4F_START, EDI), EDI ) 96 ADD_L( EDI, ECX ) 97 98ALIGNTEXT16 99LLBL(x86_p4_gr_loop): 100 101 FLD_S( SRC0 ) /* F4 */ 102 FMUL_S( MAT0 ) 103 FLD_S( SRC0 ) /* F5 F4 */ 104 FMUL_S( MAT1 ) 105 FLD_S( SRC0 ) /* F6 F5 F4 */ 106 FMUL_S( MAT2 ) 107 FLD_S( SRC0 ) /* F7 F6 F5 F4 */ 108 FMUL_S( MAT3 ) 109 110 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */ 111 FMUL_S( MAT4 ) 112 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */ 113 FMUL_S( MAT5 ) 114 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */ 115 FMUL_S( MAT6 ) 116 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ 117 FMUL_S( MAT7 ) 118 119 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ 120 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ 121 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ 122 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ 123 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ 124 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ 125 126 FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */ 127 FMUL_S( MAT8 ) 128 FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */ 129 FMUL_S( MAT9 ) 130 FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */ 131 FMUL_S( MAT10 ) 132 FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ 133 FMUL_S( MAT11 ) 134 135 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ 136 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ 137 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ 138 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ 139 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ 140 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ 141 142 FLD_S( SRC3 ) /* F0 F7 F6 F5 F4 */ 143 FMUL_S( MAT12 ) 144 FLD_S( SRC3 ) /* F1 F0 F7 F6 F5 F4 */ 145 FMUL_S( MAT13 ) 146 FLD_S( SRC3 ) /* F2 F1 F0 F7 F6 F5 F4 */ 147 FMUL_S( MAT14 ) 148 FLD_S( SRC3 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ 149 FMUL_S( MAT15 ) 150 151 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ 152 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ 153 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ 154 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ 155 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ 156 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ 157 158 FXCH( ST(3) ) /* F4 F6 F5 F7 */ 159 FSTP_S( DST0 ) /* F6 F5 F7 */ 160 FXCH( ST(1) ) /* F5 F6 F7 */ 161 FSTP_S( DST1 ) /* F6 F7 */ 162 FSTP_S( DST2 ) /* F7 */ 163 FSTP_S( DST3 ) /* */ 164 165LLBL(x86_p4_gr_skip): 166 167 ADD_L( CONST(16), EDI ) 168 ADD_L( EAX, ESI ) 169 CMP_L( ECX, EDI ) 170 JNE( LLBL(x86_p4_gr_loop) ) 171 172LLBL(x86_p4_gr_done): 173 174 POP_L( EDI ) 175 POP_L( ESI ) 176 RET 177#undef FRAME_OFFSET 178 179 180 181 182ALIGNTEXT16 183GLOBL GLNAME( _mesa_x86_transform_points4_perspective ) 184HIDDEN(_mesa_x86_transform_points4_perspective) 185GLNAME( _mesa_x86_transform_points4_perspective ): 186 187#define FRAME_OFFSET 12 188 PUSH_L( ESI ) 189 PUSH_L( EDI ) 190 PUSH_L( EBX ) 191 192 MOV_L( ARG_SOURCE, ESI ) 193 MOV_L( ARG_DEST, EDI ) 194 195 MOV_L( ARG_MATRIX, EDX ) 196 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 197 198 TEST_L( ECX, ECX ) 199 JZ( LLBL(x86_p4_pr_done) ) 200 201 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 202 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 203 204 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 205 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 206 207 SHL_L( CONST(4), ECX ) 208 MOV_L( REGOFF(V4F_START, ESI), ESI ) 209 210 MOV_L( REGOFF(V4F_START, EDI), EDI ) 211 ADD_L( EDI, ECX ) 212 213ALIGNTEXT16 214LLBL(x86_p4_pr_loop): 215 216 FLD_S( SRC0 ) /* F4 */ 217 FMUL_S( MAT0 ) 218 219 FLD_S( SRC1 ) /* F5 F4 */ 220 FMUL_S( MAT5 ) 221 222 FLD_S( SRC2 ) /* F0 F5 F4 */ 223 FMUL_S( MAT8 ) 224 FLD_S( SRC2 ) /* F1 F0 F5 F4 */ 225 FMUL_S( MAT9 ) 226 FLD_S( SRC2 ) /* F6 F1 F0 F5 F4 */ 227 FMUL_S( MAT10 ) 228 229 FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */ 230 FADDP( ST0, ST(4) ) /* F1 F6 F5 F4 */ 231 FADDP( ST0, ST(2) ) /* F6 F5 F4 */ 232 233 FLD_S( SRC3 ) /* F2 F6 F5 F4 */ 234 FMUL_S( MAT14 ) 235 236 FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 237 238 MOV_L( SRC2, EBX ) 239 XOR_L( CONST(-2147483648), EBX )/* change sign */ 240 241 FXCH( ST(2) ) /* F4 F5 F6 */ 242 FSTP_S( DST0 ) /* F5 F6 */ 243 FSTP_S( DST1 ) /* F6 */ 244 FSTP_S( DST2 ) /* */ 245 MOV_L( EBX, DST3 ) 246 247LLBL(x86_p4_pr_skip): 248 249 ADD_L( CONST(16), EDI ) 250 ADD_L( EAX, ESI ) 251 CMP_L( ECX, EDI ) 252 JNE( LLBL(x86_p4_pr_loop) ) 253 254LLBL(x86_p4_pr_done): 255 256 POP_L( EBX ) 257 POP_L( EDI ) 258 POP_L( ESI ) 259 RET 260#undef FRAME_OFFSET 261 262 263 264 265ALIGNTEXT16 266GLOBL GLNAME( _mesa_x86_transform_points4_3d ) 267HIDDEN(_mesa_x86_transform_points4_3d) 268GLNAME( _mesa_x86_transform_points4_3d ): 269 270#define FRAME_OFFSET 12 271 PUSH_L( ESI ) 272 PUSH_L( EDI ) 273 PUSH_L( EBX ) 274 275 MOV_L( ARG_SOURCE, ESI ) 276 MOV_L( ARG_DEST, EDI ) 277 278 MOV_L( ARG_MATRIX, EDX ) 279 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 280 281 TEST_L( ECX, ECX ) 282 JZ( LLBL(x86_p4_3dr_done) ) 283 284 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 285 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 286 287 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 288 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 289 290 SHL_L( CONST(4), ECX ) 291 MOV_L( REGOFF(V4F_START, ESI), ESI ) 292 293 MOV_L( REGOFF(V4F_START, EDI), EDI ) 294 ADD_L( EDI, ECX ) 295 296ALIGNTEXT16 297LLBL(x86_p4_3dr_loop): 298 299 FLD_S( SRC0 ) /* F4 */ 300 FMUL_S( MAT0 ) 301 FLD_S( SRC0 ) /* F5 F4 */ 302 FMUL_S( MAT1 ) 303 FLD_S( SRC0 ) /* F6 F5 F4 */ 304 FMUL_S( MAT2 ) 305 306 FLD_S( SRC1 ) /* F0 F6 F5 F4 */ 307 FMUL_S( MAT4 ) 308 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */ 309 FMUL_S( MAT5 ) 310 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */ 311 FMUL_S( MAT6 ) 312 313 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 314 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 315 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 316 FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 317 318 FLD_S( SRC2 ) /* F0 F6 F5 F4 */ 319 FMUL_S( MAT8 ) 320 FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */ 321 FMUL_S( MAT9 ) 322 FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */ 323 FMUL_S( MAT10 ) 324 325 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 326 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 327 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 328 FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 329 330 FLD_S( SRC3 ) /* F0 F6 F5 F4 */ 331 FMUL_S( MAT12 ) 332 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */ 333 FMUL_S( MAT13 ) 334 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */ 335 FMUL_S( MAT14 ) 336 337 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 338 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 339 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 340 FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 341 342 MOV_L( SRC3, EBX ) 343 344 FXCH( ST(2) ) /* F4 F5 F6 */ 345 FSTP_S( DST0 ) /* F5 F6 */ 346 FSTP_S( DST1 ) /* F6 */ 347 FSTP_S( DST2 ) /* */ 348 MOV_L( EBX, DST3 ) 349 350LLBL(x86_p4_3dr_skip): 351 352 ADD_L( CONST(16), EDI ) 353 ADD_L( EAX, ESI ) 354 CMP_L( ECX, EDI ) 355 JNE( LLBL(x86_p4_3dr_loop) ) 356 357LLBL(x86_p4_3dr_done): 358 359 POP_L( EBX ) 360 POP_L( EDI ) 361 POP_L( ESI ) 362 RET 363#undef FRAME_OFFSET 364 365 366 367 368ALIGNTEXT16 369GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot) 370HIDDEN(_mesa_x86_transform_points4_3d_no_rot) 371GLNAME(_mesa_x86_transform_points4_3d_no_rot): 372 373#define FRAME_OFFSET 12 374 PUSH_L( ESI ) 375 PUSH_L( EDI ) 376 PUSH_L( EBX ) 377 378 MOV_L( ARG_SOURCE, ESI ) 379 MOV_L( ARG_DEST, EDI ) 380 381 MOV_L( ARG_MATRIX, EDX ) 382 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 383 384 TEST_L( ECX, ECX ) 385 JZ( LLBL(x86_p4_3dnrr_done) ) 386 387 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 388 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 389 390 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 391 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 392 393 SHL_L( CONST(4), ECX ) 394 MOV_L( REGOFF(V4F_START, ESI), ESI ) 395 396 MOV_L( REGOFF(V4F_START, EDI), EDI ) 397 ADD_L( EDI, ECX ) 398 399ALIGNTEXT16 400LLBL(x86_p4_3dnrr_loop): 401 402 FLD_S( SRC0 ) /* F4 */ 403 FMUL_S( MAT0 ) 404 405 FLD_S( SRC1 ) /* F5 F4 */ 406 FMUL_S( MAT5 ) 407 408 FLD_S( SRC2 ) /* F6 F5 F4 */ 409 FMUL_S( MAT10 ) 410 411 FLD_S( SRC3 ) /* F0 F6 F5 F4 */ 412 FMUL_S( MAT12 ) 413 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */ 414 FMUL_S( MAT13 ) 415 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */ 416 FMUL_S( MAT14 ) 417 418 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 419 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 420 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 421 FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 422 423 MOV_L( SRC3, EBX ) 424 425 FXCH( ST(2) ) /* F4 F5 F6 */ 426 FSTP_S( DST0 ) /* F5 F6 */ 427 FSTP_S( DST1 ) /* F6 */ 428 FSTP_S( DST2 ) /* */ 429 MOV_L( EBX, DST3 ) 430 431LLBL(x86_p4_3dnrr_skip): 432 433 ADD_L( CONST(16), EDI ) 434 ADD_L( EAX, ESI ) 435 CMP_L( ECX, EDI ) 436 JNE( LLBL(x86_p4_3dnrr_loop) ) 437 438LLBL(x86_p4_3dnrr_done): 439 440 POP_L( EBX ) 441 POP_L( EDI ) 442 POP_L( ESI ) 443 RET 444#undef FRAME_OFFSET 445 446 447 448 449ALIGNTEXT16 450GLOBL GLNAME( _mesa_x86_transform_points4_2d ) 451HIDDEN(_mesa_x86_transform_points4_2d) 452GLNAME( _mesa_x86_transform_points4_2d ): 453 454#define FRAME_OFFSET 16 455 PUSH_L( ESI ) 456 PUSH_L( EDI ) 457 PUSH_L( EBX ) 458 PUSH_L( EBP ) 459 460 MOV_L( ARG_SOURCE, ESI ) 461 MOV_L( ARG_DEST, EDI ) 462 463 MOV_L( ARG_MATRIX, EDX ) 464 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 465 466 TEST_L( ECX, ECX ) 467 JZ( LLBL(x86_p4_2dr_done) ) 468 469 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 470 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 471 472 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 473 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 474 475 SHL_L( CONST(4), ECX ) 476 MOV_L( REGOFF(V4F_START, ESI), ESI ) 477 478 MOV_L( REGOFF(V4F_START, EDI), EDI ) 479 ADD_L( EDI, ECX ) 480 481ALIGNTEXT16 482LLBL(x86_p4_2dr_loop): 483 484 FLD_S( SRC0 ) /* F4 */ 485 FMUL_S( MAT0 ) 486 FLD_S( SRC0 ) /* F5 F4 */ 487 FMUL_S( MAT1 ) 488 489 FLD_S( SRC1 ) /* F0 F5 F4 */ 490 FMUL_S( MAT4 ) 491 FLD_S( SRC1 ) /* F1 F0 F5 F4 */ 492 FMUL_S( MAT5 ) 493 494 FXCH( ST(1) ) /* F0 F1 F5 F4 */ 495 FADDP( ST0, ST(3) ) /* F1 F5 F4 */ 496 FADDP( ST0, ST(1) ) /* F5 F4 */ 497 498 FLD_S( SRC3 ) /* F0 F5 F4 */ 499 FMUL_S( MAT12 ) 500 FLD_S( SRC3 ) /* F1 F0 F5 F4 */ 501 FMUL_S( MAT13 ) 502 503 FXCH( ST(1) ) /* F0 F1 F5 F4 */ 504 FADDP( ST0, ST(3) ) /* F1 F5 F4 */ 505 FADDP( ST0, ST(1) ) /* F5 F4 */ 506 507 MOV_L( SRC2, EBX ) 508 MOV_L( SRC3, EBP ) 509 510 FXCH( ST(1) ) /* F4 F5 */ 511 FSTP_S( DST0 ) /* F5 */ 512 FSTP_S( DST1 ) /* */ 513 MOV_L( EBX, DST2 ) 514 MOV_L( EBP, DST3 ) 515 516LLBL(x86_p4_2dr_skip): 517 518 ADD_L( CONST(16), EDI ) 519 ADD_L( EAX, ESI ) 520 CMP_L( ECX, EDI ) 521 JNE( LLBL(x86_p4_2dr_loop) ) 522 523LLBL(x86_p4_2dr_done): 524 525 POP_L( EBP ) 526 POP_L( EBX ) 527 POP_L( EDI ) 528 POP_L( ESI ) 529 RET 530#undef FRAME_OFFSET 531 532 533 534 535ALIGNTEXT16 536GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot ) 537HIDDEN(_mesa_x86_transform_points4_2d_no_rot) 538GLNAME( _mesa_x86_transform_points4_2d_no_rot ): 539 540#define FRAME_OFFSET 16 541 PUSH_L( ESI ) 542 PUSH_L( EDI ) 543 PUSH_L( EBX ) 544 PUSH_L( EBP ) 545 546 MOV_L( ARG_SOURCE, ESI ) 547 MOV_L( ARG_DEST, EDI ) 548 549 MOV_L( ARG_MATRIX, EDX ) 550 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 551 552 TEST_L( ECX, ECX ) 553 JZ( LLBL(x86_p4_2dnrr_done) ) 554 555 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 556 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 557 558 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 559 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 560 561 SHL_L( CONST(4), ECX ) 562 MOV_L( REGOFF(V4F_START, ESI), ESI ) 563 564 MOV_L( REGOFF(V4F_START, EDI), EDI ) 565 ADD_L( EDI, ECX ) 566 567ALIGNTEXT16 568LLBL(x86_p4_2dnrr_loop): 569 570 FLD_S( SRC0 ) /* F4 */ 571 FMUL_S( MAT0 ) 572 573 FLD_S( SRC1 ) /* F5 F4 */ 574 FMUL_S( MAT5 ) 575 576 FLD_S( SRC3 ) /* F0 F5 F4 */ 577 FMUL_S( MAT12 ) 578 FLD_S( SRC3 ) /* F1 F0 F5 F4 */ 579 FMUL_S( MAT13 ) 580 581 FXCH( ST(1) ) /* F0 F1 F5 F4 */ 582 FADDP( ST0, ST(3) ) /* F1 F5 F4 */ 583 FADDP( ST0, ST(1) ) /* F5 F4 */ 584 585 MOV_L( SRC2, EBX ) 586 MOV_L( SRC3, EBP ) 587 588 FXCH( ST(1) ) /* F4 F5 */ 589 FSTP_S( DST0 ) /* F5 */ 590 FSTP_S( DST1 ) /* */ 591 MOV_L( EBX, DST2 ) 592 MOV_L( EBP, DST3 ) 593 594LLBL(x86_p4_2dnrr_skip): 595 596 ADD_L( CONST(16), EDI ) 597 ADD_L( EAX, ESI ) 598 CMP_L( ECX, EDI ) 599 JNE( LLBL(x86_p4_2dnrr_loop) ) 600 601LLBL(x86_p4_2dnrr_done): 602 603 POP_L( EBP ) 604 POP_L( EBX ) 605 POP_L( EDI ) 606 POP_L( ESI ) 607 RET 608#undef FRAME_OFFSET 609 610 611 612 613ALIGNTEXT16 614GLOBL GLNAME( _mesa_x86_transform_points4_identity ) 615HIDDEN(_mesa_x86_transform_points4_identity) 616GLNAME( _mesa_x86_transform_points4_identity ): 617 618#define FRAME_OFFSET 12 619 PUSH_L( ESI ) 620 PUSH_L( EDI ) 621 PUSH_L( EBX ) 622 623 MOV_L( ARG_SOURCE, ESI ) 624 MOV_L( ARG_DEST, EDI ) 625 626 MOV_L( ARG_MATRIX, EDX ) 627 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 628 629 TEST_L( ECX, ECX ) 630 JZ( LLBL(x86_p4_ir_done) ) 631 632 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 633 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 634 635 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 636 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 637 638 SHL_L( CONST(4), ECX ) 639 MOV_L( REGOFF(V4F_START, ESI), ESI ) 640 641 MOV_L( REGOFF(V4F_START, EDI), EDI ) 642 ADD_L( EDI, ECX ) 643 644 CMP_L( ESI, EDI ) 645 JE( LLBL(x86_p4_ir_done) ) 646 647ALIGNTEXT16 648LLBL(x86_p4_ir_loop): 649 650 MOV_L( SRC0, EBX ) 651 MOV_L( SRC1, EDX ) 652 653 MOV_L( EBX, DST0 ) 654 MOV_L( EDX, DST1 ) 655 656 MOV_L( SRC2, EBX ) 657 MOV_L( SRC3, EDX ) 658 659 MOV_L( EBX, DST2 ) 660 MOV_L( EDX, DST3 ) 661 662LLBL(x86_p4_ir_skip): 663 664 ADD_L( CONST(16), EDI ) 665 ADD_L( EAX, ESI ) 666 CMP_L( ECX, EDI ) 667 JNE( LLBL(x86_p4_ir_loop) ) 668 669LLBL(x86_p4_ir_done): 670 671 POP_L( EBX ) 672 POP_L( EDI ) 673 POP_L( ESI ) 674 RET 675 676#if defined (__ELF__) && defined (__linux__) 677 .section .note.GNU-stack,"",%progbits 678#endif 679