1/* 2 * Clip testing in SPARC assembly 3 */ 4 5#if __arch64__ 6#define LDPTR ldx 7#define MATH_ASM_PTR_SIZE 8 8#include "math/m_vector_asm.h" 9#else 10#define LDPTR ld 11#define MATH_ASM_PTR_SIZE 4 12#include "math/m_vector_asm.h" 13#endif 14 15#define VEC_SIZE_1 1 16#define VEC_SIZE_2 3 17#define VEC_SIZE_3 7 18#define VEC_SIZE_4 15 19 20 .register %g2, #scratch 21 .register %g3, #scratch 22 23 .text 24 .align 64 25 26one_dot_zero: 27 .word 0x3f800000 /* 1.0f */ 28 29 /* This trick is shamelessly stolen from the x86 30 * Mesa asm. Very clever, and we can do it too 31 * since we have the necessary add with carry 32 * instructions on Sparc. 33 */ 34clip_table: 35 .byte 0, 1, 0, 2, 4, 5, 4, 6 36 .byte 0, 1, 0, 2, 8, 9, 8, 10 37 .byte 32, 33, 32, 34, 36, 37, 36, 38 38 .byte 32, 33, 32, 34, 40, 41, 40, 42 39 .byte 0, 1, 0, 2, 4, 5, 4, 6 40 .byte 0, 1, 0, 2, 8, 9, 8, 10 41 .byte 16, 17, 16, 18, 20, 21, 20, 22 42 .byte 16, 17, 16, 18, 24, 25, 24, 26 43 .byte 63, 61, 63, 62, 55, 53, 55, 54 44 .byte 63, 61, 63, 62, 59, 57, 59, 58 45 .byte 47, 45, 47, 46, 39, 37, 39, 38 46 .byte 47, 45, 47, 46, 43, 41, 43, 42 47 .byte 63, 61, 63, 62, 55, 53, 55, 54 48 .byte 63, 61, 63, 62, 59, 57, 59, 58 49 .byte 31, 29, 31, 30, 23, 21, 23, 22 50 .byte 31, 29, 31, 30, 27, 25, 27, 26 51 52/* GLvector4f *clip_vec, GLvector4f *proj_vec, 53 GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask, 54 GLboolean viewport_z_enable */ 55 56 .align 64 57__pc_tramp: 58 retl 59 nop 60 61 .globl _mesa_sparc_cliptest_points4 62_mesa_sparc_cliptest_points4: 63 save %sp, -64, %sp 64 call __pc_tramp 65 sub %o7, (. - one_dot_zero - 4), %g1 66 ld [%g1 + 0x0], %f4 67 add %g1, 0x4, %g1 68 69 ld [%i0 + V4F_STRIDE], %l1 70 ld [%i0 + V4F_COUNT], %l3 71 LDPTR [%i0 + V4F_START], %i0 72 LDPTR [%i1 + V4F_START], %i5 73 ldub [%i3], %g2 74 ldub [%i4], %g3 75 sll %g3, 8, %g3 76 or %g2, %g3, %g2 77 78 ld [%i1 + V4F_FLAGS], %g3 79 or %g3, VEC_SIZE_4, %g3 80 st %g3, [%i1 + V4F_FLAGS] 81 mov 3, %g3 82 st %g3, [%i1 + V4F_SIZE] 83 st %l3, [%i1 + V4F_COUNT] 84 clr %l2 85 clr %l0 86 87 /* l0: i 88 * l3: count 89 * l1: stride 90 * l2: c 91 * g2: (tmpAndMask << 8) | tmpOrMask 92 * g1: clip_table 93 * i0: from[stride][i] 94 * i2: clipMask 95 * i5: vProj[4][i] 96 */ 97 981: ld [%i0 + 0x0c], %f3 ! LSU Group 99 ld [%i0 + 0x0c], %g5 ! LSU Group 100 ld [%i0 + 0x08], %g4 ! LSU Group 101 fdivs %f4, %f3, %f8 ! FGM 102 addcc %g5, %g5, %g5 ! IEU1 Group 103 addx %g0, 0x0, %g3 ! IEU1 Group 104 addcc %g4, %g4, %g4 ! IEU1 Group 105 addx %g3, %g3, %g3 ! IEU1 Group 106 subcc %g5, %g4, %g0 ! IEU1 Group 107 ld [%i0 + 0x04], %g4 ! LSU Group 108 addx %g3, %g3, %g3 ! IEU1 Group 109 addcc %g4, %g4, %g4 ! IEU1 Group 110 addx %g3, %g3, %g3 ! IEU1 Group 111 subcc %g5, %g4, %g0 ! IEU1 Group 112 ld [%i0 + 0x00], %g4 ! LSU Group 113 addx %g3, %g3, %g3 ! IEU1 Group 114 addcc %g4, %g4, %g4 ! IEU1 Group 115 addx %g3, %g3, %g3 ! IEU1 Group 116 subcc %g5, %g4, %g0 ! IEU1 Group 117 addx %g3, %g3, %g3 ! IEU1 Group 118 ldub [%g1 + %g3], %g3 ! LSU Group 119 cmp %g3, 0 ! IEU1 Group, stall 120 be 2f ! CTI 121 stb %g3, [%i2] ! LSU 122 sll %g3, 8, %g4 ! IEU1 Group 123 add %l2, 1, %l2 ! IEU0 124 st %g0, [%i5 + 0x00] ! LSU 125 or %g4, 0xff, %g4 ! IEU0 Group 126 or %g2, %g3, %g2 ! IEU1 127 st %g0, [%i5 + 0x04] ! LSU 128 and %g2, %g4, %g2 ! IEU0 Group 129 st %g0, [%i5 + 0x08] ! LSU 130 b 3f ! CTI 131 st %f4, [%i5 + 0x0c] ! LSU Group 1322: ld [%i0 + 0x00], %f0 ! LSU Group 133 ld [%i0 + 0x04], %f1 ! LSU Group 134 ld [%i0 + 0x08], %f2 ! LSU Group 135 fmuls %f0, %f8, %f0 ! FGM 136 st %f0, [%i5 + 0x00] ! LSU Group 137 fmuls %f1, %f8, %f1 ! FGM 138 st %f1, [%i5 + 0x04] ! LSU Group 139 fmuls %f2, %f8, %f2 ! FGM 140 st %f2, [%i5 + 0x08] ! LSU Group 141 st %f8, [%i5 + 0x0c] ! LSU Group 1423: add %i5, 0x10, %i5 ! IEU1 143 add %l0, 1, %l0 ! IEU0 Group 144 add %i2, 1, %i2 ! IEU0 Group 145 cmp %l0, %l3 ! IEU1 Group 146 bne 1b ! CTI 147 add %i0, %l1, %i0 ! IEU0 Group 148 stb %g2, [%i3] ! LSU 149 srl %g2, 8, %g3 ! IEU0 Group 150 cmp %l2, %l3 ! IEU1 Group 151 bl,a 1f ! CTI 152 clr %g3 ! IEU0 1531: stb %g3, [%i4] ! LSU Group 154 ret ! CTI Group 155 restore %i1, 0x0, %o0 156 157 .globl _mesa_sparc_cliptest_points4_np 158_mesa_sparc_cliptest_points4_np: 159 save %sp, -64, %sp 160 161 call __pc_tramp 162 sub %o7, (. - one_dot_zero - 4), %g1 163 add %g1, 0x4, %g1 164 165 ld [%i0 + V4F_STRIDE], %l1 166 ld [%i0 + V4F_COUNT], %l3 167 LDPTR [%i0 + V4F_START], %i0 168 ldub [%i3], %g2 169 ldub [%i4], %g3 170 sll %g3, 8, %g3 171 or %g2, %g3, %g2 172 173 clr %l2 174 clr %l0 175 176 /* l0: i 177 * l3: count 178 * l1: stride 179 * l2: c 180 * g2: (tmpAndMask << 8) | tmpOrMask 181 * g1: clip_table 182 * i0: from[stride][i] 183 * i2: clipMask 184 */ 185 1861: ld [%i0 + 0x0c], %g5 ! LSU Group 187 ld [%i0 + 0x08], %g4 ! LSU Group 188 addcc %g5, %g5, %g5 ! IEU1 Group 189 addx %g0, 0x0, %g3 ! IEU1 Group 190 addcc %g4, %g4, %g4 ! IEU1 Group 191 addx %g3, %g3, %g3 ! IEU1 Group 192 subcc %g5, %g4, %g0 ! IEU1 Group 193 ld [%i0 + 0x04], %g4 ! LSU Group 194 addx %g3, %g3, %g3 ! IEU1 Group 195 addcc %g4, %g4, %g4 ! IEU1 Group 196 addx %g3, %g3, %g3 ! IEU1 Group 197 subcc %g5, %g4, %g0 ! IEU1 Group 198 ld [%i0 + 0x00], %g4 ! LSU Group 199 addx %g3, %g3, %g3 ! IEU1 Group 200 addcc %g4, %g4, %g4 ! IEU1 Group 201 addx %g3, %g3, %g3 ! IEU1 Group 202 subcc %g5, %g4, %g0 ! IEU1 Group 203 addx %g3, %g3, %g3 ! IEU1 Group 204 ldub [%g1 + %g3], %g3 ! LSU Group 205 cmp %g3, 0 ! IEU1 Group, stall 206 be 2f ! CTI 207 stb %g3, [%i2] ! LSU 208 sll %g3, 8, %g4 ! IEU1 Group 209 add %l2, 1, %l2 ! IEU0 210 or %g4, 0xff, %g4 ! IEU0 Group 211 or %g2, %g3, %g2 ! IEU1 212 and %g2, %g4, %g2 ! IEU0 Group 2132: add %l0, 1, %l0 ! IEU0 Group 214 add %i2, 1, %i2 ! IEU0 Group 215 cmp %l0, %l3 ! IEU1 Group 216 bne 1b ! CTI 217 add %i0, %l1, %i0 ! IEU0 Group 218 stb %g2, [%i3] ! LSU 219 srl %g2, 8, %g3 ! IEU0 Group 220 cmp %l2, %l3 ! IEU1 Group 221 bl,a 1f ! CTI 222 clr %g3 ! IEU0 2231: stb %g3, [%i4] ! LSU Group 224 ret ! CTI Group 225 restore %i1, 0x0, %o0 226