1/* 2 * Clip testing in SPARC assembly 3 */ 4 5#if __arch64__ 6#define LDPTR ldx 7#define V4F_DATA 0x00 8#define V4F_START 0x08 9#define V4F_COUNT 0x10 10#define V4F_STRIDE 0x14 11#define V4F_SIZE 0x18 12#define V4F_FLAGS 0x1c 13#else 14#define LDPTR ld 15#define V4F_DATA 0x00 16#define V4F_START 0x04 17#define V4F_COUNT 0x08 18#define V4F_STRIDE 0x0c 19#define V4F_SIZE 0x10 20#define V4F_FLAGS 0x14 21#endif 22 23#define VEC_SIZE_1 1 24#define VEC_SIZE_2 3 25#define VEC_SIZE_3 7 26#define VEC_SIZE_4 15 27 28 .register %g2, #scratch 29 .register %g3, #scratch 30 31 .text 32 .align 64 33 34one_dot_zero: 35 .word 0x3f800000 /* 1.0f */ 36 37 /* This trick is shamelessly stolen from the x86 38 * Mesa asm. Very clever, and we can do it too 39 * since we have the necessary add with carry 40 * instructions on Sparc. 41 */ 42clip_table: 43 .byte 0, 1, 0, 2, 4, 5, 4, 6 44 .byte 0, 1, 0, 2, 8, 9, 8, 10 45 .byte 32, 33, 32, 34, 36, 37, 36, 38 46 .byte 32, 33, 32, 34, 40, 41, 40, 42 47 .byte 0, 1, 0, 2, 4, 5, 4, 6 48 .byte 0, 1, 0, 2, 8, 9, 8, 10 49 .byte 16, 17, 16, 18, 20, 21, 20, 22 50 .byte 16, 17, 16, 18, 24, 25, 24, 26 51 .byte 63, 61, 63, 62, 55, 53, 55, 54 52 .byte 63, 61, 63, 62, 59, 57, 59, 58 53 .byte 47, 45, 47, 46, 39, 37, 39, 38 54 .byte 47, 45, 47, 46, 43, 41, 43, 42 55 .byte 63, 61, 63, 62, 55, 53, 55, 54 56 .byte 63, 61, 63, 62, 59, 57, 59, 58 57 .byte 31, 29, 31, 30, 23, 21, 23, 22 58 .byte 31, 29, 31, 30, 27, 25, 27, 26 59 60/* GLvector4f *clip_vec, GLvector4f *proj_vec, 61 GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask, 62 GLboolean viewport_z_enable */ 63 64 .align 64 65__pc_tramp: 66 retl 67 nop 68 69 .globl _mesa_sparc_cliptest_points4 70_mesa_sparc_cliptest_points4: 71 save %sp, -64, %sp 72 call __pc_tramp 73 sub %o7, (. - one_dot_zero - 4), %g1 74 ld [%g1 + 0x0], %f4 75 add %g1, 0x4, %g1 76 77 ld [%i0 + V4F_STRIDE], %l1 78 ld [%i0 + V4F_COUNT], %l3 79 LDPTR [%i0 + V4F_START], %i0 80 LDPTR [%i1 + V4F_START], %i5 81 ldub [%i3], %g2 82 ldub [%i4], %g3 83 sll %g3, 8, %g3 84 or %g2, %g3, %g2 85 86 ld [%i1 + V4F_FLAGS], %g3 87 or %g3, VEC_SIZE_4, %g3 88 st %g3, [%i1 + V4F_FLAGS] 89 mov 3, %g3 90 st %g3, [%i1 + V4F_SIZE] 91 st %l3, [%i1 + V4F_COUNT] 92 clr %l2 93 clr %l0 94 95 /* l0: i 96 * l3: count 97 * l1: stride 98 * l2: c 99 * g2: (tmpAndMask << 8) | tmpOrMask 100 * g1: clip_table 101 * i0: from[stride][i] 102 * i2: clipMask 103 * i5: vProj[4][i] 104 */ 105 1061: ld [%i0 + 0x0c], %f3 ! LSU Group 107 ld [%i0 + 0x0c], %g5 ! LSU Group 108 ld [%i0 + 0x08], %g4 ! LSU Group 109 fdivs %f4, %f3, %f8 ! FGM 110 addcc %g5, %g5, %g5 ! IEU1 Group 111 addx %g0, 0x0, %g3 ! IEU1 Group 112 addcc %g4, %g4, %g4 ! IEU1 Group 113 addx %g3, %g3, %g3 ! IEU1 Group 114 subcc %g5, %g4, %g0 ! IEU1 Group 115 ld [%i0 + 0x04], %g4 ! LSU Group 116 addx %g3, %g3, %g3 ! IEU1 Group 117 addcc %g4, %g4, %g4 ! IEU1 Group 118 addx %g3, %g3, %g3 ! IEU1 Group 119 subcc %g5, %g4, %g0 ! IEU1 Group 120 ld [%i0 + 0x00], %g4 ! LSU Group 121 addx %g3, %g3, %g3 ! IEU1 Group 122 addcc %g4, %g4, %g4 ! IEU1 Group 123 addx %g3, %g3, %g3 ! IEU1 Group 124 subcc %g5, %g4, %g0 ! IEU1 Group 125 addx %g3, %g3, %g3 ! IEU1 Group 126 ldub [%g1 + %g3], %g3 ! LSU Group 127 cmp %g3, 0 ! IEU1 Group, stall 128 be 2f ! CTI 129 stb %g3, [%i2] ! LSU 130 sll %g3, 8, %g4 ! IEU1 Group 131 add %l2, 1, %l2 ! IEU0 132 st %g0, [%i5 + 0x00] ! LSU 133 or %g4, 0xff, %g4 ! IEU0 Group 134 or %g2, %g3, %g2 ! IEU1 135 st %g0, [%i5 + 0x04] ! LSU 136 and %g2, %g4, %g2 ! IEU0 Group 137 st %g0, [%i5 + 0x08] ! LSU 138 b 3f ! CTI 139 st %f4, [%i5 + 0x0c] ! LSU Group 1402: ld [%i0 + 0x00], %f0 ! LSU Group 141 ld [%i0 + 0x04], %f1 ! LSU Group 142 ld [%i0 + 0x08], %f2 ! LSU Group 143 fmuls %f0, %f8, %f0 ! FGM 144 st %f0, [%i5 + 0x00] ! LSU Group 145 fmuls %f1, %f8, %f1 ! FGM 146 st %f1, [%i5 + 0x04] ! LSU Group 147 fmuls %f2, %f8, %f2 ! FGM 148 st %f2, [%i5 + 0x08] ! LSU Group 149 st %f8, [%i5 + 0x0c] ! LSU Group 1503: add %i5, 0x10, %i5 ! IEU1 151 add %l0, 1, %l0 ! IEU0 Group 152 add %i2, 1, %i2 ! IEU0 Group 153 cmp %l0, %l3 ! IEU1 Group 154 bne 1b ! CTI 155 add %i0, %l1, %i0 ! IEU0 Group 156 stb %g2, [%i3] ! LSU 157 srl %g2, 8, %g3 ! IEU0 Group 158 cmp %l2, %l3 ! IEU1 Group 159 bl,a 1f ! CTI 160 clr %g3 ! IEU0 1611: stb %g3, [%i4] ! LSU Group 162 ret ! CTI Group 163 restore %i1, 0x0, %o0 164 165 .globl _mesa_sparc_cliptest_points4_np 166_mesa_sparc_cliptest_points4_np: 167 save %sp, -64, %sp 168 169 call __pc_tramp 170 sub %o7, (. - one_dot_zero - 4), %g1 171 add %g1, 0x4, %g1 172 173 ld [%i0 + V4F_STRIDE], %l1 174 ld [%i0 + V4F_COUNT], %l3 175 LDPTR [%i0 + V4F_START], %i0 176 ldub [%i3], %g2 177 ldub [%i4], %g3 178 sll %g3, 8, %g3 179 or %g2, %g3, %g2 180 181 clr %l2 182 clr %l0 183 184 /* l0: i 185 * l3: count 186 * l1: stride 187 * l2: c 188 * g2: (tmpAndMask << 8) | tmpOrMask 189 * g1: clip_table 190 * i0: from[stride][i] 191 * i2: clipMask 192 */ 193 1941: ld [%i0 + 0x0c], %g5 ! LSU Group 195 ld [%i0 + 0x08], %g4 ! LSU Group 196 addcc %g5, %g5, %g5 ! IEU1 Group 197 addx %g0, 0x0, %g3 ! IEU1 Group 198 addcc %g4, %g4, %g4 ! IEU1 Group 199 addx %g3, %g3, %g3 ! IEU1 Group 200 subcc %g5, %g4, %g0 ! IEU1 Group 201 ld [%i0 + 0x04], %g4 ! LSU Group 202 addx %g3, %g3, %g3 ! IEU1 Group 203 addcc %g4, %g4, %g4 ! IEU1 Group 204 addx %g3, %g3, %g3 ! IEU1 Group 205 subcc %g5, %g4, %g0 ! IEU1 Group 206 ld [%i0 + 0x00], %g4 ! LSU Group 207 addx %g3, %g3, %g3 ! IEU1 Group 208 addcc %g4, %g4, %g4 ! IEU1 Group 209 addx %g3, %g3, %g3 ! IEU1 Group 210 subcc %g5, %g4, %g0 ! IEU1 Group 211 addx %g3, %g3, %g3 ! IEU1 Group 212 ldub [%g1 + %g3], %g3 ! LSU Group 213 cmp %g3, 0 ! IEU1 Group, stall 214 be 2f ! CTI 215 stb %g3, [%i2] ! LSU 216 sll %g3, 8, %g4 ! IEU1 Group 217 add %l2, 1, %l2 ! IEU0 218 or %g4, 0xff, %g4 ! IEU0 Group 219 or %g2, %g3, %g2 ! IEU1 220 and %g2, %g4, %g2 ! IEU0 Group 2212: add %l0, 1, %l0 ! IEU0 Group 222 add %i2, 1, %i2 ! IEU0 Group 223 cmp %l0, %l3 ! IEU1 Group 224 bne 1b ! CTI 225 add %i0, %l1, %i0 ! IEU0 Group 226 stb %g2, [%i3] ! LSU 227 srl %g2, 8, %g3 ! IEU0 Group 228 cmp %l2, %l3 ! IEU1 Group 229 bl,a 1f ! CTI 230 clr %g3 ! IEU0 2311: stb %g3, [%i4] ! LSU Group 232 ret ! CTI Group 233 restore %i1, 0x0, %o0 234