1 2/* 3 * Mesa 3-D graphics library 4 * 5 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 23 * OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26/* 27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially 28 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces 29 * in there will break the build on some platforms. 30 */ 31 32#include "assyntax.h" 33#define MATH_ASM_PTR_SIZE 4 34#include "math/m_vector_asm.h" 35#include "xform_args.h" 36 37 SEG_TEXT 38 39#define FP_ONE 1065353216 40#define FP_ZERO 0 41 42#define SRC0 REGOFF(0, ESI) 43#define SRC1 REGOFF(4, ESI) 44#define SRC2 REGOFF(8, ESI) 45#define SRC3 REGOFF(12, ESI) 46#define DST0 REGOFF(0, EDI) 47#define DST1 REGOFF(4, EDI) 48#define DST2 REGOFF(8, EDI) 49#define DST3 REGOFF(12, EDI) 50#define MAT0 REGOFF(0, EDX) 51#define MAT1 REGOFF(4, EDX) 52#define MAT2 REGOFF(8, EDX) 53#define MAT3 REGOFF(12, EDX) 54#define MAT4 REGOFF(16, EDX) 55#define MAT5 REGOFF(20, EDX) 56#define MAT6 REGOFF(24, EDX) 57#define MAT7 REGOFF(28, EDX) 58#define MAT8 REGOFF(32, EDX) 59#define MAT9 REGOFF(36, EDX) 60#define MAT10 REGOFF(40, EDX) 61#define MAT11 REGOFF(44, EDX) 62#define MAT12 REGOFF(48, EDX) 63#define MAT13 REGOFF(52, EDX) 64#define MAT14 REGOFF(56, EDX) 65#define MAT15 REGOFF(60, EDX) 66 67 68ALIGNTEXT16 69GLOBL GLNAME( _mesa_x86_transform_points2_general ) 70HIDDEN(_mesa_x86_transform_points2_general) 71GLNAME( _mesa_x86_transform_points2_general ): 72 73#define FRAME_OFFSET 8 74 PUSH_L( ESI ) 75 PUSH_L( EDI ) 76 77 MOV_L( ARG_SOURCE, ESI ) 78 MOV_L( ARG_DEST, EDI ) 79 80 MOV_L( ARG_MATRIX, EDX ) 81 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 82 83 TEST_L( ECX, ECX ) 84 JZ( LLBL(x86_p2_gr_done) ) 85 86 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 87 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 88 89 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 90 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 91 92 SHL_L( CONST(4), ECX ) 93 MOV_L( REGOFF(V4F_START, ESI), ESI ) 94 95 MOV_L( REGOFF(V4F_START, EDI), EDI ) 96 ADD_L( EDI, ECX ) 97 98ALIGNTEXT16 99LLBL(x86_p2_gr_loop): 100 101 FLD_S( SRC0 ) /* F4 */ 102 FMUL_S( MAT0 ) 103 FLD_S( SRC0 ) /* F5 F4 */ 104 FMUL_S( MAT1 ) 105 FLD_S( SRC0 ) /* F6 F5 F4 */ 106 FMUL_S( MAT2 ) 107 FLD_S( SRC0 ) /* F7 F6 F5 F4 */ 108 FMUL_S( MAT3 ) 109 110 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */ 111 FMUL_S( MAT4 ) 112 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */ 113 FMUL_S( MAT5 ) 114 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */ 115 FMUL_S( MAT6 ) 116 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ 117 FMUL_S( MAT7 ) 118 119 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ 120 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ 121 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ 122 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ 123 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ 124 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ 125 126 FXCH( ST(3) ) /* F4 F6 F5 F7 */ 127 FADD_S( MAT12 ) 128 FXCH( ST(2) ) /* F5 F6 F4 F7 */ 129 FADD_S( MAT13 ) 130 FXCH( ST(1) ) /* F6 F5 F4 F7 */ 131 FADD_S( MAT14 ) 132 FXCH( ST(3) ) /* F7 F5 F4 F6 */ 133 FADD_S( MAT15 ) 134 135 FXCH( ST(2) ) /* F4 F5 F7 F6 */ 136 FSTP_S( DST0 ) /* F5 F7 F6 */ 137 FSTP_S( DST1 ) /* F7 F6 */ 138 FXCH( ST(1) ) /* F6 F7 */ 139 FSTP_S( DST2 ) /* F7 */ 140 FSTP_S( DST3 ) /* */ 141 142LLBL(x86_p2_gr_skip): 143 144 ADD_L( CONST(16), EDI ) 145 ADD_L( EAX, ESI ) 146 CMP_L( ECX, EDI ) 147 JNE( LLBL(x86_p2_gr_loop) ) 148 149LLBL(x86_p2_gr_done): 150 151 POP_L( EDI ) 152 POP_L( ESI ) 153 RET 154#undef FRAME_OFFSET 155 156 157 158 159ALIGNTEXT16 160GLOBL GLNAME( _mesa_x86_transform_points2_perspective ) 161HIDDEN(_mesa_x86_transform_points2_perspective) 162GLNAME( _mesa_x86_transform_points2_perspective ): 163 164#define FRAME_OFFSET 12 165 PUSH_L( ESI ) 166 PUSH_L( EDI ) 167 PUSH_L( EBX ) 168 169 MOV_L( ARG_SOURCE, ESI ) 170 MOV_L( ARG_DEST, EDI ) 171 172 MOV_L( ARG_MATRIX, EDX ) 173 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 174 175 TEST_L( ECX, ECX ) 176 JZ( LLBL(x86_p2_pr_done) ) 177 178 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 179 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 180 181 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 182 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 183 184 SHL_L( CONST(4), ECX ) 185 MOV_L( REGOFF(V4F_START, ESI), ESI ) 186 187 MOV_L( REGOFF(V4F_START, EDI), EDI ) 188 ADD_L( EDI, ECX ) 189 190 MOV_L( MAT14, EBX ) 191 192ALIGNTEXT16 193LLBL(x86_p2_pr_loop): 194 195 FLD_S( SRC0 ) /* F4 */ 196 FMUL_S( MAT0 ) 197 198 FLD_S( SRC1 ) /* F1 F4 */ 199 FMUL_S( MAT5 ) 200 201 FXCH( ST(1) ) /* F4 F1 */ 202 FSTP_S( DST0 ) /* F1 */ 203 FSTP_S( DST1 ) /* */ 204 MOV_L( EBX, DST2 ) 205 MOV_L( CONST(FP_ZERO), DST3 ) 206 207LLBL(x86_p2_pr_skip): 208 209 ADD_L( CONST(16), EDI ) 210 ADD_L( EAX, ESI ) 211 CMP_L( ECX, EDI ) 212 JNE( LLBL(x86_p2_pr_loop) ) 213 214LLBL(x86_p2_pr_done): 215 216 POP_L( EBX ) 217 POP_L( EDI ) 218 POP_L( ESI ) 219 RET 220#undef FRAME_OFFSET 221 222 223 224 225ALIGNTEXT16 226GLOBL GLNAME( _mesa_x86_transform_points2_3d ) 227HIDDEN(_mesa_x86_transform_points2_3d) 228GLNAME( _mesa_x86_transform_points2_3d ): 229 230#define FRAME_OFFSET 8 231 PUSH_L( ESI ) 232 PUSH_L( EDI ) 233 234 MOV_L( ARG_SOURCE, ESI ) 235 MOV_L( ARG_DEST, EDI ) 236 237 MOV_L( ARG_MATRIX, EDX ) 238 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 239 240 TEST_L( ECX, ECX ) 241 JZ( LLBL(x86_p2_3dr_done) ) 242 243 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 244 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 245 246 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 247 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 248 249 SHL_L( CONST(4), ECX ) 250 MOV_L( REGOFF(V4F_START, ESI), ESI ) 251 252 MOV_L( REGOFF(V4F_START, EDI), EDI ) 253 ADD_L( EDI, ECX ) 254 255ALIGNTEXT16 256LLBL(x86_p2_3dr_loop): 257 258 FLD_S( SRC0 ) /* F4 */ 259 FMUL_S( MAT0 ) 260 FLD_S( SRC0 ) /* F5 F4 */ 261 FMUL_S( MAT1 ) 262 FLD_S( SRC0 ) /* F6 F5 F4 */ 263 FMUL_S( MAT2 ) 264 265 FLD_S( SRC1 ) /* F0 F6 F5 F4 */ 266 FMUL_S( MAT4 ) 267 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */ 268 FMUL_S( MAT5 ) 269 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */ 270 FMUL_S( MAT6 ) 271 272 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 273 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 274 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 275 FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 276 277 FXCH( ST(2) ) /* F4 F5 F6 */ 278 FADD_S( MAT12 ) 279 FXCH( ST(1) ) /* F5 F4 F6 */ 280 FADD_S( MAT13 ) 281 FXCH( ST(2) ) /* F6 F4 F5 */ 282 FADD_S( MAT14 ) 283 284 FXCH( ST(1) ) /* F4 F6 F5 */ 285 FSTP_S( DST0 ) /* F6 F5 */ 286 FXCH( ST(1) ) /* F5 F6 */ 287 FSTP_S( DST1 ) /* F6 */ 288 FSTP_S( DST2 ) /* */ 289 290LLBL(x86_p2_3dr_skip): 291 292 ADD_L( CONST(16), EDI ) 293 ADD_L( EAX, ESI ) 294 CMP_L( ECX, EDI ) 295 JNE( LLBL(x86_p2_3dr_loop) ) 296 297LLBL(x86_p2_3dr_done): 298 299 POP_L( EDI ) 300 POP_L( ESI ) 301 RET 302#undef FRAME_OFFSET 303 304 305 306 307ALIGNTEXT16 308GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot ) 309HIDDEN(_mesa_x86_transform_points2_3d_no_rot) 310GLNAME( _mesa_x86_transform_points2_3d_no_rot ): 311 312#define FRAME_OFFSET 12 313 PUSH_L( ESI ) 314 PUSH_L( EDI ) 315 PUSH_L( EBX ) 316 317 MOV_L( ARG_SOURCE, ESI ) 318 MOV_L( ARG_DEST, EDI ) 319 320 MOV_L( ARG_MATRIX, EDX ) 321 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 322 323 TEST_L( ECX, ECX ) 324 JZ( LLBL(x86_p2_3dnrr_done) ) 325 326 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 327 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 328 329 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 330 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 331 332 SHL_L( CONST(4), ECX ) 333 MOV_L( REGOFF(V4F_START, ESI), ESI ) 334 335 MOV_L( REGOFF(V4F_START, EDI), EDI ) 336 ADD_L( EDI, ECX ) 337 338 MOV_L( MAT14, EBX ) 339 340ALIGNTEXT16 341LLBL(x86_p2_3dnrr_loop): 342 343 FLD_S( SRC0 ) /* F4 */ 344 FMUL_S( MAT0 ) 345 346 FLD_S( SRC1 ) /* F1 F4 */ 347 FMUL_S( MAT5 ) 348 349 FXCH( ST(1) ) /* F4 F1 */ 350 FADD_S( MAT12 ) 351 FLD_S( MAT13 ) /* F5 F4 F1 */ 352 FXCH( ST(2) ) /* F1 F4 F5 */ 353 FADDP( ST0, ST(2) ) /* F4 F5 */ 354 355 FSTP_S( DST0 ) /* F5 */ 356 FSTP_S( DST1 ) /* */ 357 MOV_L( EBX, DST2 ) 358 359LLBL(x86_p2_3dnrr_skip): 360 361 ADD_L( CONST(16), EDI ) 362 ADD_L( EAX, ESI ) 363 CMP_L( ECX, EDI ) 364 JNE( LLBL(x86_p2_3dnrr_loop) ) 365 366LLBL(x86_p2_3dnrr_done): 367 368 POP_L( EBX ) 369 POP_L( EDI ) 370 POP_L( ESI ) 371 RET 372#undef FRAME_OFFSET 373 374 375 376 377ALIGNTEXT16 378GLOBL GLNAME( _mesa_x86_transform_points2_2d ) 379HIDDEN(_mesa_x86_transform_points2_2d) 380GLNAME( _mesa_x86_transform_points2_2d ): 381 382#define FRAME_OFFSET 8 383 PUSH_L( ESI ) 384 PUSH_L( EDI ) 385 386 MOV_L( ARG_SOURCE, ESI ) 387 MOV_L( ARG_DEST, EDI ) 388 389 MOV_L( ARG_MATRIX, EDX ) 390 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 391 392 TEST_L( ECX, ECX ) 393 JZ( LLBL(x86_p2_2dr_done) ) 394 395 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 396 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 397 398 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 399 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 400 401 SHL_L( CONST(4), ECX ) 402 MOV_L( REGOFF(V4F_START, ESI), ESI ) 403 404 MOV_L( REGOFF(V4F_START, EDI), EDI ) 405 ADD_L( EDI, ECX ) 406 407ALIGNTEXT16 408LLBL(x86_p2_2dr_loop): 409 410 FLD_S( SRC0 ) /* F4 */ 411 FMUL_S( MAT0 ) 412 FLD_S( SRC0 ) /* F5 F4 */ 413 FMUL_S( MAT1 ) 414 415 FLD_S( SRC1 ) /* F0 F5 F4 */ 416 FMUL_S( MAT4 ) 417 FLD_S( SRC1 ) /* F1 F0 F5 F4 */ 418 FMUL_S( MAT5 ) 419 420 FXCH( ST(1) ) /* F0 F1 F5 F4 */ 421 FADDP( ST0, ST(3) ) /* F1 F5 F4 */ 422 FADDP( ST0, ST(1) ) /* F5 F4 */ 423 424 FXCH( ST(1) ) /* F4 F5 */ 425 FADD_S( MAT12 ) 426 FXCH( ST(1) ) /* F5 F4 */ 427 FADD_S( MAT13 ) 428 429 FXCH( ST(1) ) /* F4 F5 */ 430 FSTP_S( DST0 ) /* F5 */ 431 FSTP_S( DST1 ) /* */ 432 433LLBL(x86_p2_2dr_skip): 434 435 ADD_L( CONST(16), EDI ) 436 ADD_L( EAX, ESI ) 437 CMP_L( ECX, EDI ) 438 JNE( LLBL(x86_p2_2dr_loop) ) 439 440LLBL(x86_p2_2dr_done): 441 442 POP_L( EDI ) 443 POP_L( ESI ) 444 RET 445#undef FRAME_OFFSET 446 447 448 449 450ALIGNTEXT4 451GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot ) 452HIDDEN(_mesa_x86_transform_points2_2d_no_rot) 453GLNAME( _mesa_x86_transform_points2_2d_no_rot ): 454 455#define FRAME_OFFSET 8 456 PUSH_L( ESI ) 457 PUSH_L( EDI ) 458 459 MOV_L( ARG_SOURCE, ESI ) 460 MOV_L( ARG_DEST, EDI ) 461 462 MOV_L( ARG_MATRIX, EDX ) 463 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 464 465 TEST_L( ECX, ECX ) 466 JZ( LLBL(x86_p2_2dnrr_done) ) 467 468 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 469 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 470 471 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 472 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 473 474 SHL_L( CONST(4), ECX ) 475 MOV_L( REGOFF(V4F_START, ESI), ESI ) 476 477 MOV_L( REGOFF(V4F_START, EDI), EDI ) 478 ADD_L( EDI, ECX ) 479 480ALIGNTEXT16 481LLBL(x86_p2_2dnrr_loop): 482 483 FLD_S( SRC0 ) /* F4 */ 484 FMUL_S( MAT0 ) 485 486 FLD_S( SRC1 ) /* F1 F4 */ 487 FMUL_S( MAT5 ) 488 489 FXCH( ST(1) ) /* F4 F1 */ 490 FADD_S( MAT12 ) 491 FLD_S( MAT13 ) /* F5 F4 F1 */ 492 FXCH( ST(2) ) /* F1 F4 F5 */ 493 FADDP( ST0, ST(2) ) /* F4 F5 */ 494 495 FSTP_S( DST0 ) /* F5 */ 496 FSTP_S( DST1 ) /* */ 497 498LLBL(x86_p2_2dnrr_skip): 499 500 ADD_L( CONST(16), EDI ) 501 ADD_L( EAX, ESI ) 502 CMP_L( ECX, EDI ) 503 JNE( LLBL(x86_p2_2dnrr_loop) ) 504 505LLBL(x86_p2_2dnrr_done): 506 507 POP_L( EDI ) 508 POP_L( ESI ) 509 RET 510#undef FRAME_OFFSET 511 512 513 514 515ALIGNTEXT16 516GLOBL GLNAME( _mesa_x86_transform_points2_identity ) 517HIDDEN(_mesa_x86_transform_points2_identity) 518GLNAME( _mesa_x86_transform_points2_identity ): 519 520#define FRAME_OFFSET 12 521 PUSH_L( ESI ) 522 PUSH_L( EDI ) 523 PUSH_L( EBX ) 524 525 MOV_L( ARG_SOURCE, ESI ) 526 MOV_L( ARG_DEST, EDI ) 527 528 MOV_L( ARG_MATRIX, EDX ) 529 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 530 531 TEST_L( ECX, ECX ) 532 JZ( LLBL(x86_p2_ir_done) ) 533 534 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 535 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 536 537 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 538 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 539 540 SHL_L( CONST(4), ECX ) 541 MOV_L( REGOFF(V4F_START, ESI), ESI ) 542 543 MOV_L( REGOFF(V4F_START, EDI), EDI ) 544 ADD_L( EDI, ECX ) 545 546 CMP_L( ESI, EDI ) 547 JE( LLBL(x86_p2_ir_done) ) 548 549ALIGNTEXT16 550LLBL(x86_p2_ir_loop): 551 552 MOV_L( SRC0, EBX ) 553 MOV_L( SRC1, EDX ) 554 555 MOV_L( EBX, DST0 ) 556 MOV_L( EDX, DST1 ) 557 558LLBL(x86_p2_ir_skip): 559 560 ADD_L( CONST(16), EDI ) 561 ADD_L( EAX, ESI ) 562 CMP_L( ECX, EDI ) 563 JNE( LLBL(x86_p2_ir_loop) ) 564 565LLBL(x86_p2_ir_done): 566 567 POP_L( EBX ) 568 POP_L( EDI ) 569 POP_L( ESI ) 570 RET 571#undef FRAME_OFFSET 572 573#if defined (__ELF__) && defined (__linux__) 574 .section .note.GNU-stack,"",%progbits 575#endif 576