1 1. store_src_rg coords = src.rg 2 2. init_lane_masks CondMask = LoopMask = RetMask = true 3 3. zero_4_slots_unmasked _0_m = 0 4 4. zero_4_slots_unmasked _1_mm = 0 5 5. zero_2_slots_unmasked $0..1 = 0 6 6. swizzle_4 $0..3 = ($0..3).yxxy 7 7. copy_4_slots_unmasked _3_z = $0..3 8 8. copy_4_constants $0..3 = testMatrix2x2 9 9. copy_4_slots_unmasked _0_m = $0..3 10 10. copy_4_constants $0..3 = testMatrix2x2 11 11. copy_4_slots_unmasked _0_m = $0..3 12 12. zero_4_slots_unmasked $0..3 = 0 13 13. copy_4_slots_unmasked $4..7 = _0_m 14 14. sub_4_floats $0..3 -= $4..7 15 15. copy_4_slots_unmasked _0_m = $0..3 16 16. zero_2_slots_unmasked $0..1 = 0 17 17. swizzle_4 $0..3 = ($0..3).yxxy 18 18. copy_4_slots_unmasked _1_mm = $0..3 19 19. zero_2_slots_unmasked $0..1 = 0 20 20. swizzle_4 $0..3 = ($0..3).yxxy 21 21. copy_4_slots_unmasked _1_mm = $0..3 22 22. store_condition_mask $62 = CondMask 23 23. store_condition_mask $49 = CondMask 24 24. copy_4_slots_unmasked $50..53 = _0_m 25 25. zero_4_slots_unmasked $54..57 = 0 26 26. copy_4_constants $58..61 = testMatrix2x2 27 27. sub_4_floats $54..57 -= $58..61 28 28. cmpeq_4_floats $50..53 = equal($50..53, $54..57) 29 29. bitwise_and_2_ints $50..51 &= $52..53 30 30. bitwise_and_int $50 &= $51 31 31. copy_4_slots_unmasked $51..54 = _1_mm 32 32. copy_4_slots_unmasked $55..58 = _3_z 33 33. cmpeq_4_floats $51..54 = equal($51..54, $55..58) 34 34. bitwise_and_2_ints $51..52 &= $53..54 35 35. bitwise_and_int $51 &= $52 36 36. bitwise_and_int $50 &= $51 37 37. zero_slot_unmasked $63 = 0 38 38. merge_condition_mask CondMask = $49 & $50 39 39. branch_if_no_active_lanes branch_if_no_active_lanes +72 (label 2 at #111) 40 40. zero_4_slots_unmasked m(0..3) = 0 41 41. zero_4_slots_unmasked m(4..7) = 0 42 42. zero_4_slots_unmasked m(8), mm(0..2) = 0 43 43. zero_4_slots_unmasked mm(3..6) = 0 44 44. zero_2_slots_unmasked mm(7..8) = 0 45 45. zero_2_slots_unmasked $64..65 = 0 46 46. shuffle $64..72 = ($64..72)[1 0 0 0 1 0 0 0 1] 47 47. copy_4_slots_unmasked z(0..3) = $64..67 48 48. copy_4_slots_unmasked z(4..7) = $68..71 49 49. copy_slot_unmasked z(8) = $72 50 50. copy_4_constants $64..67 = testMatrix3x3(0..3) 51 51. copy_4_constants $68..71 = testMatrix3x3(4..7) 52 52. copy_constant $72 = testMatrix3x3(8) 53 53. copy_4_slots_masked m(0..3) = Mask($64..67) 54 54. copy_4_slots_masked m(4..7) = Mask($68..71) 55 55. copy_slot_masked m(8) = Mask($72) 56 56. copy_4_constants $64..67 = testMatrix3x3(0..3) 57 57. copy_4_constants $68..71 = testMatrix3x3(4..7) 58 58. copy_constant $72 = testMatrix3x3(8) 59 59. copy_4_slots_masked m(0..3) = Mask($64..67) 60 60. copy_4_slots_masked m(4..7) = Mask($68..71) 61 61. copy_slot_masked m(8) = Mask($72) 62 62. zero_4_slots_unmasked $64..67 = 0 63 63. zero_4_slots_unmasked $68..71 = 0 64 64. zero_slot_unmasked $72 = 0 65 65. copy_4_slots_unmasked $73..76 = m(0..3) 66 66. copy_4_slots_unmasked $77..80 = m(4..7) 67 67. copy_slot_unmasked $81 = m(8) 68 68. sub_n_floats $64..72 -= $73..81 69 69. copy_4_slots_masked m(0..3) = Mask($64..67) 70 70. copy_4_slots_masked m(4..7) = Mask($68..71) 71 71. copy_slot_masked m(8) = Mask($72) 72 72. zero_2_slots_unmasked $64..65 = 0 73 73. shuffle $64..72 = ($64..72)[1 0 0 0 1 0 0 0 1] 74 74. copy_4_slots_masked mm(0..3) = Mask($64..67) 75 75. copy_4_slots_masked mm(4..7) = Mask($68..71) 76 76. copy_slot_masked mm(8) = Mask($72) 77 77. zero_2_slots_unmasked $64..65 = 0 78 78. shuffle $64..72 = ($64..72)[1 0 0 0 1 0 0 0 1] 79 79. copy_4_slots_masked mm(0..3) = Mask($64..67) 80 80. copy_4_slots_masked mm(4..7) = Mask($68..71) 81 81. copy_slot_masked mm(8) = Mask($72) 82 82. copy_4_slots_unmasked $64..67 = m(0..3) 83 83. copy_4_slots_unmasked $68..71 = m(4..7) 84 84. copy_slot_unmasked $72 = m(8) 85 85. zero_4_slots_unmasked $73..76 = 0 86 86. zero_4_slots_unmasked $77..80 = 0 87 87. zero_slot_unmasked $81 = 0 88 88. copy_4_constants $82..85 = testMatrix3x3(0..3) 89 89. copy_4_constants $86..89 = testMatrix3x3(4..7) 90 90. copy_constant $90 = testMatrix3x3(8) 91 91. sub_n_floats $73..81 -= $82..90 92 92. cmpeq_n_floats $64..72 = equal($64..72, $73..81) 93 93. bitwise_and_4_ints $65..68 &= $69..72 94 94. bitwise_and_2_ints $65..66 &= $67..68 95 95. bitwise_and_int $65 &= $66 96 96. bitwise_and_int $64 &= $65 97 97. copy_4_slots_unmasked $65..68 = mm(0..3) 98 98. copy_4_slots_unmasked $69..72 = mm(4..7) 99 99. copy_4_slots_unmasked $73..76 = mm(8), z(0..2) 100 100. copy_4_slots_unmasked $77..80 = z(3..6) 101 101. copy_2_slots_unmasked $81..82 = z(7..8) 102 102. cmpeq_n_floats $65..73 = equal($65..73, $74..82) 103 103. bitwise_and_4_ints $66..69 &= $70..73 104 104. bitwise_and_2_ints $66..67 &= $68..69 105 105. bitwise_and_int $66 &= $67 106 106. bitwise_and_int $65 &= $66 107 107. bitwise_and_int $64 &= $65 108 108. copy_slot_masked [test_mat3_mat3].result = Mask($64) 109 109. label label 0x00000003 110 110. copy_slot_masked $63 = Mask($64) 111 111. label label 0x00000002 112 112. load_condition_mask CondMask = $49 113 113. zero_slot_unmasked $0 = 0 114 114. merge_condition_mask CondMask = $62 & $63 115 115. branch_if_no_active_lanes branch_if_no_active_lanes +101 (label 1 at #216) 116 116. copy_4_constants $1..4 = testInputs 117 117. copy_4_constants $5..8 = testInputs 118 118. copy_4_constants $9..12 = testInputs 119 119. copy_4_constants $13..16 = testInputs 120 120. copy_4_slots_unmasked testMatrix4x4(0..3) = $1..4 121 121. copy_4_slots_unmasked testMatrix4x4(4..7) = $5..8 122 122. copy_4_slots_unmasked testMatrix4x4(8..11) = $9..12 123 123. copy_4_slots_unmasked testMatrix4x4(12..15) = $13..16 124 124. zero_4_slots_unmasked m₁(0..3) = 0 125 125. zero_4_slots_unmasked m₁(4..7) = 0 126 126. zero_4_slots_unmasked m₁(8..11) = 0 127 127. zero_4_slots_unmasked m₁(12..15) = 0 128 128. zero_4_slots_unmasked mm₁(0..3) = 0 129 129. zero_4_slots_unmasked mm₁(4..7) = 0 130 130. zero_4_slots_unmasked mm₁(8..11) = 0 131 131. zero_4_slots_unmasked mm₁(12..15) = 0 132 132. zero_2_slots_unmasked $1..2 = 0 133 133. shuffle $1..16 = ($1..16)[1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1] 134 134. copy_4_slots_unmasked z₁(0..3) = $1..4 135 135. copy_4_slots_unmasked z₁(4..7) = $5..8 136 136. copy_4_slots_unmasked z₁(8..11) = $9..12 137 137. copy_4_slots_unmasked z₁(12..15) = $13..16 138 138. copy_4_slots_unmasked $1..4 = testMatrix4x4(0..3) 139 139. copy_4_slots_unmasked $5..8 = testMatrix4x4(4..7) 140 140. copy_4_slots_unmasked $9..12 = testMatrix4x4(8..11) 141 141. copy_4_slots_unmasked $13..16 = testMatrix4x4(12..15) 142 142. copy_4_slots_masked m₁(0..3) = Mask($1..4) 143 143. copy_4_slots_masked m₁(4..7) = Mask($5..8) 144 144. copy_4_slots_masked m₁(8..11) = Mask($9..12) 145 145. copy_4_slots_masked m₁(12..15) = Mask($13..16) 146 146. copy_4_slots_unmasked $1..4 = testMatrix4x4(0..3) 147 147. copy_4_slots_unmasked $5..8 = testMatrix4x4(4..7) 148 148. copy_4_slots_unmasked $9..12 = testMatrix4x4(8..11) 149 149. copy_4_slots_unmasked $13..16 = testMatrix4x4(12..15) 150 150. copy_4_slots_masked m₁(0..3) = Mask($1..4) 151 151. copy_4_slots_masked m₁(4..7) = Mask($5..8) 152 152. copy_4_slots_masked m₁(8..11) = Mask($9..12) 153 153. copy_4_slots_masked m₁(12..15) = Mask($13..16) 154 154. zero_4_slots_unmasked $1..4 = 0 155 155. zero_4_slots_unmasked $5..8 = 0 156 156. zero_4_slots_unmasked $9..12 = 0 157 157. zero_4_slots_unmasked $13..16 = 0 158 158. copy_4_slots_unmasked $17..20 = m₁(0..3) 159 159. copy_4_slots_unmasked $21..24 = m₁(4..7) 160 160. copy_4_slots_unmasked $25..28 = m₁(8..11) 161 161. copy_4_slots_unmasked $29..32 = m₁(12..15) 162 162. sub_n_floats $1..16 -= $17..32 163 163. copy_4_slots_masked m₁(0..3) = Mask($1..4) 164 164. copy_4_slots_masked m₁(4..7) = Mask($5..8) 165 165. copy_4_slots_masked m₁(8..11) = Mask($9..12) 166 166. copy_4_slots_masked m₁(12..15) = Mask($13..16) 167 167. zero_2_slots_unmasked $1..2 = 0 168 168. shuffle $1..16 = ($1..16)[1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1] 169 169. copy_4_slots_masked mm₁(0..3) = Mask($1..4) 170 170. copy_4_slots_masked mm₁(4..7) = Mask($5..8) 171 171. copy_4_slots_masked mm₁(8..11) = Mask($9..12) 172 172. copy_4_slots_masked mm₁(12..15) = Mask($13..16) 173 173. zero_2_slots_unmasked $1..2 = 0 174 174. shuffle $1..16 = ($1..16)[1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1] 175 175. copy_4_slots_masked mm₁(0..3) = Mask($1..4) 176 176. copy_4_slots_masked mm₁(4..7) = Mask($5..8) 177 177. copy_4_slots_masked mm₁(8..11) = Mask($9..12) 178 178. copy_4_slots_masked mm₁(12..15) = Mask($13..16) 179 179. copy_4_slots_unmasked $1..4 = m₁(0..3) 180 180. copy_4_slots_unmasked $5..8 = m₁(4..7) 181 181. copy_4_slots_unmasked $9..12 = m₁(8..11) 182 182. copy_4_slots_unmasked $13..16 = m₁(12..15) 183 183. zero_4_slots_unmasked $17..20 = 0 184 184. zero_4_slots_unmasked $21..24 = 0 185 185. zero_4_slots_unmasked $25..28 = 0 186 186. zero_4_slots_unmasked $29..32 = 0 187 187. copy_4_slots_unmasked $33..36 = testMatrix4x4(0..3) 188 188. copy_4_slots_unmasked $37..40 = testMatrix4x4(4..7) 189 189. copy_4_slots_unmasked $41..44 = testMatrix4x4(8..11) 190 190. copy_4_slots_unmasked $45..48 = testMatrix4x4(12..15) 191 191. sub_n_floats $17..32 -= $33..48 192 192. cmpeq_n_floats $1..16 = equal($1..16, $17..32) 193 193. bitwise_and_4_ints $9..12 &= $13..16 194 194. bitwise_and_4_ints $5..8 &= $9..12 195 195. bitwise_and_4_ints $1..4 &= $5..8 196 196. bitwise_and_2_ints $1..2 &= $3..4 197 197. bitwise_and_int $1 &= $2 198 198. copy_4_slots_unmasked $2..5 = mm₁(0..3) 199 199. copy_4_slots_unmasked $6..9 = mm₁(4..7) 200 200. copy_4_slots_unmasked $10..13 = mm₁(8..11) 201 201. copy_4_slots_unmasked $14..17 = mm₁(12..15) 202 202. copy_4_slots_unmasked $18..21 = z₁(0..3) 203 203. copy_4_slots_unmasked $22..25 = z₁(4..7) 204 204. copy_4_slots_unmasked $26..29 = z₁(8..11) 205 205. copy_4_slots_unmasked $30..33 = z₁(12..15) 206 206. cmpeq_n_floats $2..17 = equal($2..17, $18..33) 207 207. bitwise_and_4_ints $10..13 &= $14..17 208 208. bitwise_and_4_ints $6..9 &= $10..13 209 209. bitwise_and_4_ints $2..5 &= $6..9 210 210. bitwise_and_2_ints $2..3 &= $4..5 211 211. bitwise_and_int $2 &= $3 212 212. bitwise_and_int $1 &= $2 213 213. copy_slot_masked [test_mat4_mat4].result = Mask($1) 214 214. label label 0x00000004 215 215. copy_slot_masked $0 = Mask($1) 216 216. label label 0x00000001 217 217. load_condition_mask CondMask = $62 218 218. swizzle_4 $0..3 = ($0..3).xxxx 219 219. copy_4_constants $4..7 = colorRed 220 220. copy_4_constants $8..11 = colorGreen 221 221. mix_4_ints $0..3 = mix($4..7, $8..11, $0..3) 222 222. copy_4_slots_unmasked [main].result = $0..3 223 223. load_src src.rgba = [main].result 224