1 2/* 3 * Mesa 3-D graphics library 4 * 5 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 23 * OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26/** TODO: 27 * - insert PREFETCH instructions to avoid cache-misses ! 28 * - some more optimizations are possible... 29 * - for 40-50% more performance in the SSE-functions, the 30 * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! 31 */ 32 33#ifdef USE_SSE_ASM 34#include "assyntax.h" 35#define MATH_ASM_PTR_SIZE 4 36#include "math/m_vector_asm.h" 37#include "xform_args.h" 38 39 SEG_TEXT 40 41#define S(i) REGOFF(i * 4, ESI) 42#define D(i) REGOFF(i * 4, EDI) 43#define M(i) REGOFF(i * 4, EDX) 44 45 46ALIGNTEXT4 47GLOBL GLNAME(_mesa_sse_transform_points1_general) 48HIDDEN( _mesa_sse_transform_points1_general ) 49GLNAME( _mesa_sse_transform_points1_general ): 50 _CET_ENDBR 51#define FRAME_OFFSET 8 52 PUSH_L ( ESI ) 53 PUSH_L ( EDI ) 54 55 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 56 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 57 58 MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ 59 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 60 61 CMP_L( CONST(0), ECX ) /* count == 0 ? */ 62 JE( LLBL(K_GTP1GR_finish) ) /* yes -> nothing to do. */ 63 64 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 65 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 66 67 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 68 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 69 70 SHL_L( CONST(4), ECX ) /* count *= 16 */ 71 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 72 73 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 74 ADD_L( EDI, ECX ) /* count += dest ptr */ 75 76 77ALIGNTEXT32 78 MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ 79 MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ 80 81ALIGNTEXT32 82LLBL(K_GTP1GR_top): 83 MOVSS( S(0), XMM2 ) /* ox */ 84 SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ 85 MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ 86 ADDPS( XMM1, XMM2 ) /* + | + | + | + */ 87 MOVUPS( XMM2, D(0) ) 88 89LLBL(K_GTP1GR_skip): 90 ADD_L ( CONST(16), EDI ) 91 ADD_L ( EAX, ESI ) 92 CMP_L ( ECX, EDI ) 93 JNE ( LLBL(K_GTP1GR_top) ) 94 95LLBL(K_GTP1GR_finish): 96 POP_L ( EDI ) 97 POP_L ( ESI ) 98 RET 99#undef FRAME_OFFSET 100 101 102 103ALIGNTEXT4 104GLOBL GLNAME(_mesa_sse_transform_points1_identity) 105HIDDEN(_mesa_sse_transform_points1_identity) 106GLNAME( _mesa_sse_transform_points1_identity ): 107 _CET_ENDBR 108#define FRAME_OFFSET 8 109 PUSH_L ( ESI ) 110 PUSH_L ( EDI ) 111 112 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 113 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 114 115 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 116 117 TEST_L( ECX, ECX) 118 JZ( LLBL(K_GTP1IR_finish) ) /* count was zero; go to finish */ 119 120 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 121 OR_L( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 122 123 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 124 MOV_L( CONST(1), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 125 126 SHL_L( CONST(4), ECX ) /* count *= 16 */ 127 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 128 129 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 130 ADD_L( EDI, ECX ) /* count += dest ptr */ 131 132 CMP_L( ESI, EDI ) 133 JE( LLBL(K_GTP1IR_finish) ) 134 135 136ALIGNTEXT32 137LLBL(K_GTP1IR_top): 138 MOV_L( S(0), EDX ) 139 MOV_L( EDX, D(0) ) 140 141LLBL(K_GTP1IR_skip): 142 ADD_L ( CONST(16), EDI ) 143 ADD_L ( EAX, ESI ) 144 CMP_L ( ECX, EDI ) 145 JNE ( LLBL(K_GTP1IR_top) ) 146 147LLBL(K_GTP1IR_finish): 148 POP_L ( EDI ) 149 POP_L ( ESI ) 150 RET 151#undef FRAME_OFFSET 152 153 154 155ALIGNTEXT4 156GLOBL GLNAME(_mesa_sse_transform_points1_3d_no_rot) 157HIDDEN(_mesa_sse_transform_points1_3d_no_rot) 158GLNAME(_mesa_sse_transform_points1_3d_no_rot): 159 _CET_ENDBR 160#define FRAME_OFFSET 8 161 PUSH_L( ESI ) 162 PUSH_L( EDI ) 163 164 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 165 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 166 167 MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ 168 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 169 170 TEST_L( ECX, ECX) 171 JZ( LLBL(K_GTP13DNRR_finish) ) /* count was zero; go to finish */ 172 173 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 174 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 175 176 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 177 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 178 179 SHL_L( CONST(4), ECX ) /* count *= 16 */ 180 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 181 182 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 183 ADD_L( EDI, ECX ) /* count += dest ptr */ 184 185 186ALIGNTEXT32 187 MOVSS( M(0), XMM0 ) /* m0 */ 188 MOVSS( M(12), XMM1 ) /* m12 */ 189 MOVSS( M(13), XMM2 ) /* m13 */ 190 MOVSS( M(14), XMM3 ) /* m14 */ 191 192ALIGNTEXT32 193LLBL(K_GTP13DNRR_top): 194 MOVSS( S(0), XMM4 ) /* ox */ 195 MULSS( XMM0, XMM4 ) /* ox*m0 */ 196 ADDSS( XMM1, XMM4 ) /* ox*m0+m12 */ 197 MOVSS( XMM4, D(0) ) 198 199 MOVSS( XMM2, D(1) ) 200 MOVSS( XMM3, D(2) ) 201 202LLBL(K_GTP13DNRR_skip): 203 ADD_L ( CONST(16), EDI ) 204 ADD_L ( EAX, ESI ) 205 CMP_L ( ECX, EDI ) 206 JNE ( LLBL(K_GTP13DNRR_top) ) 207 208LLBL(K_GTP13DNRR_finish): 209 POP_L ( EDI ) 210 POP_L ( ESI ) 211 RET 212#undef FRAME_OFFSET 213 214 215 216ALIGNTEXT4 217GLOBL GLNAME(_mesa_sse_transform_points1_perspective) 218HIDDEN(_mesa_sse_transform_points1_perspective) 219GLNAME(_mesa_sse_transform_points1_perspective): 220 _CET_ENDBR 221#define FRAME_OFFSET 8 222 PUSH_L ( ESI ) 223 PUSH_L ( EDI ) 224 225 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 226 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 227 228 MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ 229 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 230 231 TEST_L( ECX, ECX) 232 JZ( LLBL(K_GTP13PR_finish) ) /* count was zero; go to finish */ 233 234 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 235 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 236 237 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 238 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 239 240 SHL_L( CONST(4), ECX ) /* count *= 16 */ 241 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 242 243 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 244 ADD_L( EDI, ECX ) /* count += dest ptr */ 245 246 247ALIGNTEXT32 248 XORPS( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */ 249 MOVSS( M(0), XMM1 ) /* m0 */ 250 MOVSS( M(14), XMM2 ) /* m14 */ 251 252ALIGNTEXT32 253LLBL(K_GTP13PR_top): 254 MOVSS( S(0), XMM3 ) /* ox */ 255 MULSS( XMM1, XMM3 ) /* ox*m0 */ 256 MOVSS( XMM3, D(0) ) /* ox*m0->D(0) */ 257 MOVSS( XMM2, D(2) ) /* m14->D(2) */ 258 259 MOVSS( XMM0, D(1) ) 260 MOVSS( XMM0, D(3) ) 261 262LLBL(K_GTP13PR_skip): 263 ADD_L( CONST(16), EDI ) 264 ADD_L( EAX, ESI ) 265 CMP_L( ECX, EDI ) 266 JNE( LLBL(K_GTP13PR_top) ) 267 268LLBL(K_GTP13PR_finish): 269 POP_L ( EDI ) 270 POP_L ( ESI ) 271 RET 272#undef FRAME_OFFSET 273 274 275ALIGNTEXT4 276GLOBL GLNAME(_mesa_sse_transform_points1_2d) 277HIDDEN(_mesa_sse_transform_points1_2d) 278GLNAME(_mesa_sse_transform_points1_2d): 279 _CET_ENDBR 280#define FRAME_OFFSET 8 281 PUSH_L( ESI ) 282 PUSH_L( EDI ) 283 284 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 285 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 286 287 MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ 288 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 289 290 TEST_L( ECX, ECX) 291 JZ( LLBL(K_GTP13P2DR_finish) ) /* count was zero; go to finish */ 292 293 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 294 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 295 296 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 297 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 298 299 SHL_L( CONST(4), ECX ) /* count *= 16 */ 300 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 301 302 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 303 ADD_L( EDI, ECX ) /* count += dest ptr */ 304 305ALIGNTEXT32 306 MOVLPS( M(0), XMM0 ) /* m1 | m0 */ 307 MOVLPS( M(12), XMM1 ) /* m13 | m12 */ 308 309ALIGNTEXT32 310LLBL(K_GTP13P2DR_top): 311 MOVSS( S(0), XMM2 ) /* ox */ 312 SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ 313 MULPS( XMM0, XMM2 ) /* - | - | ox*m1 | ox*m0 */ 314 ADDPS( XMM1, XMM2 ) /* - | - | ox*m1+m13 | ox*m0+m12 */ 315 MOVLPS( XMM2, D(0) ) 316 317LLBL(K_GTP13P2DR_skip): 318 ADD_L ( CONST(16), EDI ) 319 ADD_L ( EAX, ESI ) 320 CMP_L ( ECX, EDI ) 321 JNE ( LLBL(K_GTP13P2DR_top) ) 322 323LLBL(K_GTP13P2DR_finish): 324 POP_L ( EDI ) 325 POP_L ( ESI ) 326 RET 327#undef FRAME_OFFSET 328 329 330ALIGNTEXT4 331GLOBL GLNAME(_mesa_sse_transform_points1_2d_no_rot) 332HIDDEN(_mesa_sse_transform_points1_2d_no_rot) 333GLNAME(_mesa_sse_transform_points1_2d_no_rot): 334 _CET_ENDBR 335#define FRAME_OFFSET 8 336 PUSH_L( ESI ) 337 PUSH_L( EDI ) 338 339 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 340 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 341 342 MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ 343 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 344 345 TEST_L( ECX, ECX) 346 JZ( LLBL(K_GTP13P2DNRR_finish) ) /* count was zero; go to finish */ 347 348 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 349 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 350 351 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 352 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 353 354 SHL_L( CONST(4), ECX ) /* count *= 16 */ 355 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 356 357 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 358 ADD_L( EDI, ECX ) /* count += dest ptr */ 359 360ALIGNTEXT32 361 MOVSS( M(0), XMM0 ) /* m0 */ 362 MOVSS( M(12), XMM1 ) /* m12 */ 363 MOVSS( M(13), XMM2 ) /* m13 */ 364 365ALIGNTEXT32 366LLBL(K_GTP13P2DNRR_top): 367 MOVSS( S(0), XMM3 ) /* ox */ 368 MULSS( XMM0, XMM3 ) /* ox*m0 */ 369 ADDSS( XMM1, XMM3 ) /* ox*m0+m12 */ 370 MOVSS( XMM3, D(0) ) 371 MOVSS( XMM2, D(1) ) 372 373LLBL(K_GTP13P2DNRR_skip): 374 ADD_L( CONST(16), EDI ) 375 ADD_L( EAX, ESI ) 376 CMP_L( ECX, EDI ) 377 JNE( LLBL(K_GTP13P2DNRR_top) ) 378 379LLBL(K_GTP13P2DNRR_finish): 380 POP_L( EDI ) 381 POP_L( ESI ) 382 RET 383#undef FRAME_OFFSET 384 385 386 387ALIGNTEXT4 388GLOBL GLNAME(_mesa_sse_transform_points1_3d) 389HIDDEN(_mesa_sse_transform_points1_3d) 390GLNAME(_mesa_sse_transform_points1_3d): 391 _CET_ENDBR 392#define FRAME_OFFSET 8 393 PUSH_L( ESI ) 394 PUSH_L( EDI ) 395 396 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 397 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 398 399 MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ 400 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 401 402 TEST_L( ECX, ECX) 403 JZ( LLBL(K_GTP13P3DR_finish) ) /* count was zero; go to finish */ 404 405 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 406 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 407 408 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 409 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 410 411 SHL_L( CONST(4), ECX ) /* count *= 16 */ 412 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 413 414 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 415 ADD_L( EDI, ECX ) /* count += dest ptr */ 416 417 418ALIGNTEXT32 419 MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ 420 MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ 421 422ALIGNTEXT32 423LLBL(K_GTP13P3DR_top): 424 MOVSS( S(0), XMM2 ) /* ox */ 425 SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ 426 MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ 427 ADDPS( XMM1, XMM2 ) /* +m15 | +m14 | +m13 | +m12 */ 428 MOVLPS( XMM2, D(0) ) /* - | - | ->D(1)| ->D(0)*/ 429 UNPCKHPS( XMM2, XMM2 ) /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */ 430 MOVSS( XMM2, D(2) ) 431 432LLBL(K_GTP13P3DR_skip): 433 ADD_L( CONST(16), EDI ) 434 ADD_L( EAX, ESI ) 435 CMP_L( ECX, EDI ) 436 JNE( LLBL(K_GTP13P3DR_top) ) 437 438LLBL(K_GTP13P3DR_finish): 439 POP_L( EDI ) 440 POP_L( ESI ) 441 RET 442#undef FRAME_OFFSET 443#endif 444 445#if defined (__ELF__) && defined (__linux__) 446 .section .note.GNU-stack,"",%progbits 447#endif 448