1 1. store_src_rg coords = src.rg 2 2. init_lane_masks CondMask = LoopMask = RetMask = true 3 3. copy_constant $0 = colorGreen(0) 4 4. copy_constant $1 = colorGreen(2) 5 5. div_float $0 /= $1 6 6. copy_slot_unmasked NAN1 = $0 7 7. copy_constant $0 = colorGreen(2) 8 8. copy_constant $1 = colorGreen(0) 9 9. div_float $0 /= $1 10 10. copy_slot_unmasked NAN2 = $0 11 11. copy_constant $0 = colorGreen(0) 12 12. copy_constant $1 = colorGreen(2) 13 13. mul_float $0 *= $1 14 14. copy_slot_unmasked ZP = $0 15 15. zero_slot_unmasked $0 = 0 16 16. copy_constant $1 = colorGreen(0) 17 17. sub_float $0 -= $1 18 18. copy_constant $1 = colorGreen(2) 19 19. mul_float $0 *= $1 20 20. copy_slot_unmasked ZM = $0 21 21. copy_constant $0 = colorGreen(1) 22 22. copy_constant $1 = 0x42280000 (42.0) 23 23. mul_float $0 *= $1 24 24. copy_slot_unmasked F42 = $0 25 25. copy_constant $0 = colorGreen(1) 26 26. copy_constant $1 = 0x422C0000 (43.0) 27 27. mul_float $0 *= $1 28 28. copy_slot_unmasked F43 = $0 29 29. copy_constant $0 = colorGreen(1) 30 30. copy_constant $1 = 0x42300000 (44.0) 31 31. mul_float $0 *= $1 32 32. copy_slot_unmasked F44 = $0 33 33. copy_constant $0 = colorGreen(1) 34 34. copy_constant $1 = 0x42340000 (45.0) 35 35. mul_float $0 *= $1 36 36. copy_slot_unmasked F45 = $0 37 37. copy_constant EQ = 0xFFFFFFFF 38 38. zero_slot_unmasked NE = 0 39 39. copy_constant $0 = colorGreen(0) 40 40. copy_constant $1 = 0x3F800000 (1.0) 41 41. add_float $0 += $1 42 42. copy_slot_unmasked _0_one = $0 43 43. copy_slot_unmasked _1_a(0) = F42 44 44. copy_slot_unmasked _1_a(1) = ZM 45 45. copy_slot_unmasked _1_a(2) = ZP 46 46. copy_slot_unmasked _1_a(3) = F43 47 47. copy_slot_unmasked $0 = F42 48 48. copy_slot_unmasked $1 = _0_one 49 49. mul_float $0 *= $1 50 50. copy_slot_unmasked $1 = ZM 51 51. copy_slot_unmasked $2 = _0_one 52 52. mul_float $1 *= $2 53 53. copy_slot_unmasked $2 = ZP 54 54. copy_slot_unmasked $3 = _0_one 55 55. mul_float $2 *= $3 56 56. copy_slot_unmasked $3 = F43 57 57. copy_slot_unmasked $4 = _0_one 58 58. mul_float $3 *= $4 59 59. copy_4_slots_unmasked _2_b = $0..3 60 60. store_condition_mask $22 = CondMask 61 61. store_condition_mask $44 = CondMask 62 62. store_condition_mask $55 = CondMask 63 63. store_condition_mask $33 = CondMask 64 64. store_condition_mask $79 = CondMask 65 65. store_condition_mask $66 = CondMask 66 66. store_condition_mask $12 = CondMask 67 67. store_condition_mask $77 = CondMask 68 68. copy_slot_unmasked $78 = EQ 69 69. copy_4_slots_unmasked $13..16 = _1_a 70 70. copy_4_slots_unmasked $17..20 = _2_b 71 71. cmpne_4_floats $13..16 = notEqual($13..16, $17..20) 72 72. bitwise_or_2_ints $13..14 |= $15..16 73 73. bitwise_or_int $13 |= $14 74 74. merge_condition_mask CondMask = $77 & $78 75 75. branch_if_no_active_lanes branch_if_no_active_lanes +7 (label 8 at #82) 76 76. copy_4_slots_unmasked $14..17 = _1_a 77 77. copy_4_slots_unmasked $18..21 = _2_b 78 78. cmpeq_4_floats $14..17 = equal($14..17, $18..21) 79 79. bitwise_and_2_ints $14..15 &= $16..17 80 80. bitwise_and_int $14 &= $15 81 81. copy_slot_masked $13 = Mask($14) 82 82. label label 0x00000008 83 83. load_condition_mask CondMask = $77 84 84. zero_slot_unmasked $67 = 0 85 85. merge_condition_mask CondMask = $12 & $13 86 86. branch_if_no_active_lanes branch_if_no_active_lanes +45 (label 7 at #131) 87 87. copy_slot_unmasked eq = NE 88 88. copy_slot_unmasked f1 = F42 89 89. copy_slot_unmasked f2 = ZM 90 90. copy_slot_unmasked f3 = ZP 91 91. copy_slot_unmasked f4 = F43 92 92. copy_constant $68 = colorGreen(0) 93 93. copy_constant $69 = 0x3F800000 (1.0) 94 94. add_float $68 += $69 95 95. copy_slot_unmasked one = $68 96 96. copy_4_slots_unmasked a = f1, f2, f3, f4 97 97. copy_slot_unmasked $68 = f1 98 98. copy_slot_unmasked $69 = one 99 99. mul_float $68 *= $69 100 100. copy_slot_unmasked $69 = f2 101 101. copy_slot_unmasked $70 = one 102 102. mul_float $69 *= $70 103 103. copy_slot_unmasked $70 = f3 104 104. copy_slot_unmasked $71 = one 105 105. mul_float $70 *= $71 106 106. copy_slot_unmasked $71 = f4 107 107. copy_slot_unmasked $72 = one 108 108. mul_float $71 *= $72 109 109. copy_4_slots_unmasked b = $68..71 110 110. store_condition_mask $77 = CondMask 111 111. copy_slot_unmasked $78 = eq 112 112. copy_4_slots_unmasked $68..71 = a 113 113. copy_4_slots_unmasked $72..75 = b 114 114. cmpne_4_floats $68..71 = notEqual($68..71, $72..75) 115 115. bitwise_or_2_ints $68..69 |= $70..71 116 116. bitwise_or_int $68 |= $69 117 117. merge_condition_mask CondMask = $77 & $78 118 118. branch_if_no_active_lanes branch_if_no_active_lanes +7 (label 10 at #125) 119 119. copy_4_slots_unmasked $69..72 = a 120 120. copy_4_slots_unmasked $73..76 = b 121 121. cmpeq_4_floats $69..72 = equal($69..72, $73..76) 122 122. bitwise_and_2_ints $69..70 &= $71..72 123 123. bitwise_and_int $69 &= $70 124 124. copy_slot_masked $68 = Mask($69) 125 125. label label 0x0000000A 126 126. load_condition_mask CondMask = $77 127 127. copy_slot_masked [test_same_vectors].result = Mask($68) 128 128. label label 0x00000009 129 129. bitwise_not_int $68 = ~$68 130 130. copy_slot_masked $67 = Mask($68) 131 131. label label 0x00000007 132 132. load_condition_mask CondMask = $12 133 133. zero_slot_unmasked $80 = 0 134 134. merge_condition_mask CondMask = $66 & $67 135 135. branch_if_no_active_lanes branch_if_no_active_lanes +43 (label 6 at #178) 136 136. copy_slot_unmasked eq = NE 137 137. copy_slot_unmasked f1 = F42 138 138. copy_2_slots_unmasked f2, f3 = NAN1, NAN2 139 139. copy_slot_unmasked f4 = F43 140 140. copy_constant $81 = colorGreen(0) 141 141. copy_constant $82 = 0x3F800000 (1.0) 142 142. add_float $81 += $82 143 143. copy_slot_unmasked one = $81 144 144. copy_4_slots_unmasked a = f1, f2, f3, f4 145 145. copy_slot_unmasked $81 = f1 146 146. copy_slot_unmasked $82 = one 147 147. mul_float $81 *= $82 148 148. copy_slot_unmasked $82 = f2 149 149. copy_slot_unmasked $83 = one 150 150. mul_float $82 *= $83 151 151. copy_slot_unmasked $83 = f3 152 152. copy_slot_unmasked $84 = one 153 153. mul_float $83 *= $84 154 154. copy_slot_unmasked $84 = f4 155 155. copy_slot_unmasked $85 = one 156 156. mul_float $84 *= $85 157 157. copy_4_slots_unmasked b = $81..84 158 158. store_condition_mask $12 = CondMask 159 159. copy_slot_unmasked $13 = eq 160 160. copy_4_slots_unmasked $81..84 = a 161 161. copy_4_slots_unmasked $85..88 = b 162 162. cmpne_4_floats $81..84 = notEqual($81..84, $85..88) 163 163. bitwise_or_2_ints $81..82 |= $83..84 164 164. bitwise_or_int $81 |= $82 165 165. merge_condition_mask CondMask = $12 & $13 166 166. branch_if_no_active_lanes branch_if_no_active_lanes +7 (label 12 at #173) 167 167. copy_4_slots_unmasked $82..85 = a 168 168. copy_4_slots_unmasked $86..89 = b 169 169. cmpeq_4_floats $82..85 = equal($82..85, $86..89) 170 170. bitwise_and_2_ints $82..83 &= $84..85 171 171. bitwise_and_int $82 &= $83 172 172. copy_slot_masked $81 = Mask($82) 173 173. label label 0x0000000C 174 174. load_condition_mask CondMask = $12 175 175. copy_slot_masked [test_same_vectors].result = Mask($81) 176 176. label label 0x0000000B 177 177. copy_slot_masked $80 = Mask($81) 178 178. label label 0x00000006 179 179. load_condition_mask CondMask = $66 180 180. zero_slot_unmasked $34 = 0 181 181. merge_condition_mask CondMask = $79 & $80 182 182. branch_if_no_active_lanes branch_if_no_active_lanes +44 (label 5 at #226) 183 183. copy_slot_unmasked eq = EQ 184 184. copy_slot_unmasked f1 = F42 185 185. copy_2_slots_unmasked f2, f3 = NAN1, NAN2 186 186. copy_slot_unmasked f4 = F43 187 187. copy_constant $35 = colorGreen(0) 188 188. copy_constant $36 = 0x3F800000 (1.0) 189 189. add_float $35 += $36 190 190. copy_slot_unmasked one = $35 191 191. copy_4_slots_unmasked a = f1, f2, f3, f4 192 192. copy_slot_unmasked $35 = f1 193 193. copy_slot_unmasked $36 = one 194 194. mul_float $35 *= $36 195 195. copy_slot_unmasked $36 = f2 196 196. copy_slot_unmasked $37 = one 197 197. mul_float $36 *= $37 198 198. copy_slot_unmasked $37 = f3 199 199. copy_slot_unmasked $38 = one 200 200. mul_float $37 *= $38 201 201. copy_slot_unmasked $38 = f4 202 202. copy_slot_unmasked $39 = one 203 203. mul_float $38 *= $39 204 204. copy_4_slots_unmasked b = $35..38 205 205. store_condition_mask $66 = CondMask 206 206. copy_slot_unmasked $67 = eq 207 207. copy_4_slots_unmasked $35..38 = a 208 208. copy_4_slots_unmasked $39..42 = b 209 209. cmpne_4_floats $35..38 = notEqual($35..38, $39..42) 210 210. bitwise_or_2_ints $35..36 |= $37..38 211 211. bitwise_or_int $35 |= $36 212 212. merge_condition_mask CondMask = $66 & $67 213 213. branch_if_no_active_lanes branch_if_no_active_lanes +7 (label 14 at #220) 214 214. copy_4_slots_unmasked $36..39 = a 215 215. copy_4_slots_unmasked $40..43 = b 216 216. cmpeq_4_floats $36..39 = equal($36..39, $40..43) 217 217. bitwise_and_2_ints $36..37 &= $38..39 218 218. bitwise_and_int $36 &= $37 219 219. copy_slot_masked $35 = Mask($36) 220 220. label label 0x0000000E 221 221. load_condition_mask CondMask = $66 222 222. copy_slot_masked [test_same_vectors].result = Mask($35) 223 223. label label 0x0000000D 224 224. bitwise_not_int $35 = ~$35 225 225. copy_slot_masked $34 = Mask($35) 226 226. label label 0x00000005 227 227. load_condition_mask CondMask = $79 228 228. zero_slot_unmasked $56 = 0 229 229. merge_condition_mask CondMask = $33 & $34 230 230. branch_if_no_active_lanes branch_if_no_active_lanes +41 (label 4 at #271) 231 231. copy_slot_unmasked eq₁ = NE 232 232. copy_4_slots_unmasked f1₁, f2₁, f3₁, f4₁ = F42, F43, F44, F45 233 233. copy_constant $57 = colorGreen(0) 234 234. copy_constant $58 = 0x40000000 (2.0) 235 235. add_float $57 += $58 236 236. copy_slot_unmasked two = $57 237 237. copy_4_slots_unmasked a₁ = f1₁, f2₁, f3₁, f4₁ 238 238. copy_slot_unmasked $57 = f1₁ 239 239. copy_slot_unmasked $58 = two 240 240. mul_float $57 *= $58 241 241. copy_slot_unmasked $58 = f2₁ 242 242. copy_slot_unmasked $59 = two 243 243. mul_float $58 *= $59 244 244. copy_slot_unmasked $59 = f3₁ 245 245. copy_slot_unmasked $60 = two 246 246. mul_float $59 *= $60 247 247. copy_slot_unmasked $60 = f4₁ 248 248. copy_slot_unmasked $61 = two 249 249. mul_float $60 *= $61 250 250. copy_4_slots_unmasked b₁ = $57..60 251 251. store_condition_mask $79 = CondMask 252 252. copy_slot_unmasked $80 = eq₁ 253 253. copy_4_slots_unmasked $57..60 = a₁ 254 254. copy_4_slots_unmasked $61..64 = b₁ 255 255. cmpne_4_floats $57..60 = notEqual($57..60, $61..64) 256 256. bitwise_or_2_ints $57..58 |= $59..60 257 257. bitwise_or_int $57 |= $58 258 258. merge_condition_mask CondMask = $79 & $80 259 259. branch_if_no_active_lanes branch_if_no_active_lanes +7 (label 16 at #266) 260 260. copy_4_slots_unmasked $58..61 = a₁ 261 261. copy_4_slots_unmasked $62..65 = b₁ 262 262. cmpeq_4_floats $58..61 = equal($58..61, $62..65) 263 263. bitwise_and_2_ints $58..59 &= $60..61 264 264. bitwise_and_int $58 &= $59 265 265. copy_slot_masked $57 = Mask($58) 266 266. label label 0x00000010 267 267. load_condition_mask CondMask = $79 268 268. copy_slot_masked [test_diff_vectors].result = Mask($57) 269 269. label label 0x0000000F 270 270. copy_slot_masked $56 = Mask($57) 271 271. label label 0x00000004 272 272. load_condition_mask CondMask = $33 273 273. zero_slot_unmasked $45 = 0 274 274. merge_condition_mask CondMask = $55 & $56 275 275. branch_if_no_active_lanes branch_if_no_active_lanes +42 (label 3 at #317) 276 276. copy_slot_unmasked eq₁ = EQ 277 277. copy_4_slots_unmasked f1₁, f2₁, f3₁, f4₁ = F42, F43, F44, F45 278 278. copy_constant $46 = colorGreen(0) 279 279. copy_constant $47 = 0x40000000 (2.0) 280 280. add_float $46 += $47 281 281. copy_slot_unmasked two = $46 282 282. copy_4_slots_unmasked a₁ = f1₁, f2₁, f3₁, f4₁ 283 283. copy_slot_unmasked $46 = f1₁ 284 284. copy_slot_unmasked $47 = two 285 285. mul_float $46 *= $47 286 286. copy_slot_unmasked $47 = f2₁ 287 287. copy_slot_unmasked $48 = two 288 288. mul_float $47 *= $48 289 289. copy_slot_unmasked $48 = f3₁ 290 290. copy_slot_unmasked $49 = two 291 291. mul_float $48 *= $49 292 292. copy_slot_unmasked $49 = f4₁ 293 293. copy_slot_unmasked $50 = two 294 294. mul_float $49 *= $50 295 295. copy_4_slots_unmasked b₁ = $46..49 296 296. store_condition_mask $33 = CondMask 297 297. copy_slot_unmasked $34 = eq₁ 298 298. copy_4_slots_unmasked $46..49 = a₁ 299 299. copy_4_slots_unmasked $50..53 = b₁ 300 300. cmpne_4_floats $46..49 = notEqual($46..49, $50..53) 301 301. bitwise_or_2_ints $46..47 |= $48..49 302 302. bitwise_or_int $46 |= $47 303 303. merge_condition_mask CondMask = $33 & $34 304 304. branch_if_no_active_lanes branch_if_no_active_lanes +7 (label 18 at #311) 305 305. copy_4_slots_unmasked $47..50 = a₁ 306 306. copy_4_slots_unmasked $51..54 = b₁ 307 307. cmpeq_4_floats $47..50 = equal($47..50, $51..54) 308 308. bitwise_and_2_ints $47..48 &= $49..50 309 309. bitwise_and_int $47 &= $48 310 310. copy_slot_masked $46 = Mask($47) 311 311. label label 0x00000012 312 312. load_condition_mask CondMask = $33 313 313. copy_slot_masked [test_diff_vectors].result = Mask($46) 314 314. label label 0x00000011 315 315. bitwise_not_int $46 = ~$46 316 316. copy_slot_masked $45 = Mask($46) 317 317. label label 0x00000003 318 318. load_condition_mask CondMask = $55 319 319. zero_slot_unmasked $23 = 0 320 320. merge_condition_mask CondMask = $44 & $45 321 321. branch_if_no_active_lanes branch_if_no_active_lanes +44 (label 2 at #365) 322 322. copy_slot_unmasked eq₁ = NE 323 323. copy_slot_unmasked f1₁ = NAN1 324 324. copy_slot_unmasked f2₁ = ZM 325 325. copy_slot_unmasked f3₁ = ZP 326 326. copy_slot_unmasked f4₁ = F42 327 327. copy_constant $24 = colorGreen(0) 328 328. copy_constant $25 = 0x40000000 (2.0) 329 329. add_float $24 += $25 330 330. copy_slot_unmasked two = $24 331 331. copy_4_slots_unmasked a₁ = f1₁, f2₁, f3₁, f4₁ 332 332. copy_slot_unmasked $24 = f1₁ 333 333. copy_slot_unmasked $25 = two 334 334. mul_float $24 *= $25 335 335. copy_slot_unmasked $25 = f2₁ 336 336. copy_slot_unmasked $26 = two 337 337. mul_float $25 *= $26 338 338. copy_slot_unmasked $26 = f3₁ 339 339. copy_slot_unmasked $27 = two 340 340. mul_float $26 *= $27 341 341. copy_slot_unmasked $27 = f4₁ 342 342. copy_slot_unmasked $28 = two 343 343. mul_float $27 *= $28 344 344. copy_4_slots_unmasked b₁ = $24..27 345 345. store_condition_mask $55 = CondMask 346 346. copy_slot_unmasked $56 = eq₁ 347 347. copy_4_slots_unmasked $24..27 = a₁ 348 348. copy_4_slots_unmasked $28..31 = b₁ 349 349. cmpne_4_floats $24..27 = notEqual($24..27, $28..31) 350 350. bitwise_or_2_ints $24..25 |= $26..27 351 351. bitwise_or_int $24 |= $25 352 352. merge_condition_mask CondMask = $55 & $56 353 353. branch_if_no_active_lanes branch_if_no_active_lanes +7 (label 20 at #360) 354 354. copy_4_slots_unmasked $25..28 = a₁ 355 355. copy_4_slots_unmasked $29..32 = b₁ 356 356. cmpeq_4_floats $25..28 = equal($25..28, $29..32) 357 357. bitwise_and_2_ints $25..26 &= $27..28 358 358. bitwise_and_int $25 &= $26 359 359. copy_slot_masked $24 = Mask($25) 360 360. label label 0x00000014 361 361. load_condition_mask CondMask = $55 362 362. copy_slot_masked [test_diff_vectors].result = Mask($24) 363 363. label label 0x00000013 364 364. copy_slot_masked $23 = Mask($24) 365 365. label label 0x00000002 366 366. load_condition_mask CondMask = $44 367 367. zero_slot_unmasked $0 = 0 368 368. merge_condition_mask CondMask = $22 & $23 369 369. branch_if_no_active_lanes branch_if_no_active_lanes +45 (label 1 at #414) 370 370. copy_slot_unmasked eq₁ = EQ 371 371. copy_slot_unmasked f1₁ = NAN1 372 372. copy_slot_unmasked f2₁ = ZM 373 373. copy_slot_unmasked f3₁ = ZP 374 374. copy_slot_unmasked f4₁ = F42 375 375. copy_constant $1 = colorGreen(0) 376 376. copy_constant $2 = 0x40000000 (2.0) 377 377. add_float $1 += $2 378 378. copy_slot_unmasked two = $1 379 379. copy_4_slots_unmasked a₁ = f1₁, f2₁, f3₁, f4₁ 380 380. copy_slot_unmasked $1 = f1₁ 381 381. copy_slot_unmasked $2 = two 382 382. mul_float $1 *= $2 383 383. copy_slot_unmasked $2 = f2₁ 384 384. copy_slot_unmasked $3 = two 385 385. mul_float $2 *= $3 386 386. copy_slot_unmasked $3 = f3₁ 387 387. copy_slot_unmasked $4 = two 388 388. mul_float $3 *= $4 389 389. copy_slot_unmasked $4 = f4₁ 390 390. copy_slot_unmasked $5 = two 391 391. mul_float $4 *= $5 392 392. copy_4_slots_unmasked b₁ = $1..4 393 393. store_condition_mask $44 = CondMask 394 394. copy_slot_unmasked $45 = eq₁ 395 395. copy_4_slots_unmasked $1..4 = a₁ 396 396. copy_4_slots_unmasked $5..8 = b₁ 397 397. cmpne_4_floats $1..4 = notEqual($1..4, $5..8) 398 398. bitwise_or_2_ints $1..2 |= $3..4 399 399. bitwise_or_int $1 |= $2 400 400. merge_condition_mask CondMask = $44 & $45 401 401. branch_if_no_active_lanes branch_if_no_active_lanes +7 (label 22 at #408) 402 402. copy_4_slots_unmasked $2..5 = a₁ 403 403. copy_4_slots_unmasked $6..9 = b₁ 404 404. cmpeq_4_floats $2..5 = equal($2..5, $6..9) 405 405. bitwise_and_2_ints $2..3 &= $4..5 406 406. bitwise_and_int $2 &= $3 407 407. copy_slot_masked $1 = Mask($2) 408 408. label label 0x00000016 409 409. load_condition_mask CondMask = $44 410 410. copy_slot_masked [test_diff_vectors].result = Mask($1) 411 411. label label 0x00000015 412 412. bitwise_not_int $1 = ~$1 413 413. copy_slot_masked $0 = Mask($1) 414 414. label label 0x00000001 415 415. load_condition_mask CondMask = $22 416 416. swizzle_4 $0..3 = ($0..3).xxxx 417 417. copy_4_constants $4..7 = colorRed 418 418. copy_4_constants $8..11 = colorGreen 419 419. mix_4_ints $0..3 = mix($4..7, $8..11, $0..3) 420 420. copy_4_slots_unmasked [main].result = $0..3 421 421. load_src src.rgba = [main].result 422