1 2/* 3 * Mesa 3-D graphics library 4 * 5 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 23 * OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26/* 27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially 28 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces 29 * in there will break the build on some platforms. 30 */ 31 32#include "assyntax.h" 33#define MATH_ASM_PTR_SIZE 4 34#include "math/m_vector_asm.h" 35#include "clip_args.h" 36 37#define SRC0 REGOFF(0, ESI) 38#define SRC1 REGOFF(4, ESI) 39#define SRC2 REGOFF(8, ESI) 40#define SRC3 REGOFF(12, ESI) 41#define DST0 REGOFF(0, EDI) 42#define DST1 REGOFF(4, EDI) 43#define DST2 REGOFF(8, EDI) 44#define DST3 REGOFF(12, EDI) 45#define MAT0 REGOFF(0, EDX) 46#define MAT1 REGOFF(4, EDX) 47#define MAT2 REGOFF(8, EDX) 48#define MAT3 REGOFF(12, EDX) 49 50 51/* 52 * Table for clip test. 53 * 54 * bit6 = SRC3 < 0 55 * bit5 = SRC2 < 0 56 * bit4 = abs(S(2)) > abs(S(3)) 57 * bit3 = SRC1 < 0 58 * bit2 = abs(S(1)) > abs(S(3)) 59 * bit1 = SRC0 < 0 60 * bit0 = abs(S(0)) > abs(S(3)) 61 */ 62 63 SEG_DATA 64 65clip_table: 66 D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06 67 D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a 68 D_BYTE 0x20, 0x21, 0x20, 0x22, 0x24, 0x25, 0x24, 0x26 69 D_BYTE 0x20, 0x21, 0x20, 0x22, 0x28, 0x29, 0x28, 0x2a 70 D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06 71 D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a 72 D_BYTE 0x10, 0x11, 0x10, 0x12, 0x14, 0x15, 0x14, 0x16 73 D_BYTE 0x10, 0x11, 0x10, 0x12, 0x18, 0x19, 0x18, 0x1a 74 D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36 75 D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a 76 D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x27, 0x25, 0x27, 0x26 77 D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x2b, 0x29, 0x2b, 0x2a 78 D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36 79 D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a 80 D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x17, 0x15, 0x17, 0x16 81 D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x1b, 0x19, 0x1b, 0x1a 82 83 84 SEG_TEXT 85 86/* 87 * _mesa_x86_cliptest_points4 88 * 89 * AL: ormask 90 * AH: andmask 91 * EBX: temp0 92 * ECX: temp1 93 * EDX: clipmask[] 94 * ESI: clip[] 95 * EDI: proj[] 96 * EBP: temp2 97 */ 98 99#if defined(__ELF__) && defined(__PIC__) && defined(GNU_ASSEMBLER) && !defined(ELFPIC) 100#define ELFPIC 101#endif 102 103ALIGNTEXT16 104GLOBL GLNAME( _mesa_x86_cliptest_points4 ) 105HIDDEN(_mesa_x86_cliptest_points4) 106GLNAME( _mesa_x86_cliptest_points4 ): 107 _CET_ENDBR 108#ifdef ELFPIC 109#define FRAME_OFFSET 20 110#else 111#define FRAME_OFFSET 16 112#endif 113 PUSH_L( ESI ) 114 PUSH_L( EDI ) 115 PUSH_L( EBP ) 116 PUSH_L( EBX ) 117 118#ifdef ELFPIC 119 /* store pointer to clip_table on stack */ 120 CALL( LLBL(ctp4_get_eip) ) 121 ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX ) 122 MOV_L( REGOFF(clip_table@GOT, EBX), EBX ) 123 PUSH_L( EBX ) 124 JMP( LLBL(ctp4_clip_table_ready) ) 125 126LLBL(ctp4_get_eip): 127 /* store eip in ebx */ 128 MOV_L( REGIND(ESP), EBX ) 129 RET 130 131LLBL(ctp4_clip_table_ready): 132#endif 133 134 MOV_L( ARG_SOURCE, ESI ) 135 MOV_L( ARG_DEST, EDI ) 136 137 MOV_L( ARG_CLIP, EDX ) 138 MOV_L( ARG_OR, EBX ) 139 140 MOV_L( ARG_AND, EBP ) 141 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 142 143 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 144 MOV_L( REGOFF(V4F_START, ESI), ESI ) 145 146 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 147 MOV_L( EAX, ARG_SOURCE ) /* put stride in ARG_SOURCE */ 148 149 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 150 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 151 152 MOV_L( REGOFF(V4F_START, EDI), EDI ) 153 ADD_L( EDX, ECX ) 154 155 MOV_L( ECX, ARG_CLIP ) /* put clipmask + count in ARG_CLIP */ 156 CMP_L( ECX, EDX ) 157 158 MOV_B( REGIND(EBX), AL ) 159 MOV_B( REGIND(EBP), AH ) 160 161 JZ( LLBL(ctp4_finish) ) 162 163ALIGNTEXT16 164LLBL(ctp4_top): 165 166 FLD1 /* F3 */ 167 FDIV_S( SRC3 ) /* GH: don't care about div-by-zero */ 168 169 MOV_L( SRC3, EBP ) 170 MOV_L( SRC2, EBX ) 171 172 XOR_L( ECX, ECX ) 173 ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */ 174 175 ADC_L( ECX, ECX ) 176 ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */ 177 178 ADC_L( ECX, ECX ) 179 CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */ 180 181 ADC_L( ECX, ECX ) 182 MOV_L( SRC1, EBX ) 183 184 ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */ 185 186 ADC_L( ECX, ECX ) 187 CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */ 188 189 ADC_L( ECX, ECX ) 190 MOV_L( SRC0, EBX ) 191 192 ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */ 193 194 ADC_L( ECX, ECX ) 195 CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */ 196 197 ADC_L( ECX, ECX ) 198 199#ifdef ELFPIC 200 MOV_L( REGIND(ESP), EBP ) /* clip_table */ 201 202 MOV_B( REGBI(EBP, ECX), CL ) 203#else 204 MOV_B( REGOFF(clip_table,ECX), CL ) 205#endif 206 207 OR_B( CL, AL ) 208 AND_B( CL, AH ) 209 210 TEST_B( CL, CL ) 211 MOV_B( CL, REGIND(EDX) ) 212 213 JZ( LLBL(ctp4_proj) ) 214 215LLBL(ctp4_noproj): 216 217 FSTP( ST(0) ) /* */ 218 219 MOV_L( CONST(0), DST0 ) 220 MOV_L( CONST(0), DST1 ) 221 MOV_L( CONST(0), DST2 ) 222 MOV_L( CONST(0x3f800000), DST3 ) 223 224 JMP( LLBL(ctp4_next) ) 225 226LLBL(ctp4_proj): 227 228 FLD_S( SRC0 ) /* F0 F3 */ 229 FMUL2( ST(1), ST0 ) 230 231 FLD_S( SRC1 ) /* F1 F0 F3 */ 232 FMUL2( ST(2), ST0 ) 233 234 FLD_S( SRC2 ) /* F2 F1 F0 F3 */ 235 FMUL2( ST(3), ST0 ) 236 237 FXCH( ST(2) ) /* F0 F1 F2 F3 */ 238 FSTP_S( DST0 ) /* F1 F2 F3 */ 239 FSTP_S( DST1 ) /* F2 F3 */ 240 FSTP_S( DST2 ) /* F3 */ 241 FSTP_S( DST3 ) /* */ 242 243LLBL(ctp4_next): 244 245 INC_L( EDX ) 246 ADD_L( CONST(16), EDI ) 247 248 ADD_L( ARG_SOURCE, ESI ) 249 CMP_L( EDX, ARG_CLIP ) 250 251 JNZ( LLBL(ctp4_top) ) 252 253 MOV_L( ARG_OR, ECX ) 254 MOV_L( ARG_AND, EDX ) 255 256 MOV_B( AL, REGIND(ECX) ) 257 MOV_B( AH, REGIND(EDX) ) 258 259LLBL(ctp4_finish): 260 261 MOV_L( ARG_DEST, EAX ) 262#ifdef ELFPIC 263 POP_L( ESI ) /* discard ptr to clip_table */ 264#endif 265 POP_L( EBX ) 266 POP_L( EBP ) 267 POP_L( EDI ) 268 POP_L( ESI ) 269 270 RET 271 272 273 274 275 276 277 278ALIGNTEXT16 279GLOBL GLNAME( _mesa_x86_cliptest_points4_np ) 280HIDDEN(_mesa_x86_cliptest_points4_np) 281GLNAME( _mesa_x86_cliptest_points4_np ): 282 _CET_ENDBR 283#ifdef ELFPIC 284#define FRAME_OFFSET 20 285#else 286#define FRAME_OFFSET 16 287#endif 288 PUSH_L( ESI ) 289 PUSH_L( EDI ) 290 PUSH_L( EBP ) 291 PUSH_L( EBX ) 292 293#ifdef ELFPIC 294 /* store pointer to clip_table on stack */ 295 CALL( LLBL(ctp4_np_get_eip) ) 296 ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX ) 297 MOV_L( REGOFF(clip_table@GOT, EBX), EBX ) 298 PUSH_L( EBX ) 299 JMP( LLBL(ctp4_np_clip_table_ready) ) 300 301LLBL(ctp4_np_get_eip): 302 /* store eip in ebx */ 303 MOV_L( REGIND(ESP), EBX ) 304 RET 305 306LLBL(ctp4_np_clip_table_ready): 307#endif 308 309 MOV_L( ARG_SOURCE, ESI ) 310 /* slot */ 311 312 MOV_L( ARG_CLIP, EDX ) 313 MOV_L( ARG_OR, EBX ) 314 315 MOV_L( ARG_AND, EBP ) 316 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 317 318 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 319 MOV_L( REGOFF(V4F_START, ESI), ESI ) 320 321 MOV_L( EAX, ARG_DEST ) /* put stride in ARG_DEST */ 322 ADD_L( EDX, ECX ) 323 324 MOV_L( ECX, EDI ) /* put clipmask + count in EDI */ 325 CMP_L( ECX, EDX ) 326 327 MOV_B( REGIND(EBX), AL ) 328 MOV_B( REGIND(EBP), AH ) 329 330 JZ( LLBL(ctp4_np_finish) ) 331 332ALIGNTEXT16 333LLBL(ctp4_np_top): 334 335 MOV_L( SRC3, EBP ) 336 MOV_L( SRC2, EBX ) 337 338 XOR_L( ECX, ECX ) 339 ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */ 340 341 ADC_L( ECX, ECX ) 342 ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */ 343 344 ADC_L( ECX, ECX ) 345 CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */ 346 347 ADC_L( ECX, ECX ) 348 MOV_L( SRC1, EBX ) 349 350 ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */ 351 352 ADC_L( ECX, ECX ) 353 CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */ 354 355 ADC_L( ECX, ECX ) 356 MOV_L( SRC0, EBX ) 357 358 ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */ 359 360 ADC_L( ECX, ECX ) 361 CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */ 362 363 ADC_L( ECX, ECX ) 364 365#ifdef ELFPIC 366 MOV_L( REGIND(ESP), EBP ) /* clip_table */ 367 368 MOV_B( REGBI(EBP, ECX), CL ) 369#else 370 MOV_B( REGOFF(clip_table,ECX), CL ) 371#endif 372 373 OR_B( CL, AL ) 374 AND_B( CL, AH ) 375 376 TEST_B( CL, CL ) 377 MOV_B( CL, REGIND(EDX) ) 378 379 INC_L( EDX ) 380 /* slot */ 381 382 ADD_L( ARG_DEST, ESI ) 383 CMP_L( EDX, EDI ) 384 385 JNZ( LLBL(ctp4_np_top) ) 386 387 MOV_L( ARG_OR, ECX ) 388 MOV_L( ARG_AND, EDX ) 389 390 MOV_B( AL, REGIND(ECX) ) 391 MOV_B( AH, REGIND(EDX) ) 392 393LLBL(ctp4_np_finish): 394 395 MOV_L( ARG_SOURCE, EAX ) 396#ifdef ELFPIC 397 POP_L( ESI ) /* discard ptr to clip_table */ 398#endif 399 POP_L( EBX ) 400 POP_L( EBP ) 401 POP_L( EDI ) 402 POP_L( ESI ) 403 404 RET 405 406#if defined (__ELF__) && defined (__linux__) 407 .section .note.GNU-stack,"",%progbits 408#endif 409