1/* 2* Copyright (c) 2017, Intel Corporation 3* 4* Permission is hereby granted, free of charge, to any person obtaining a 5* copy of this software and associated documentation files (the "Software"), 6* to deal in the Software without restriction, including without limitation 7* the rights to use, copy, modify, merge, publish, distribute, sublicense, 8* and/or sell copies of the Software, and to permit persons to whom the 9* Software is furnished to do so, subject to the following conditions: 10* 11* The above copyright notice and this permission notice shall be included 12* in all copies or substantial portions of the Software. 13* 14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20* OTHER DEALINGS IN THE SOFTWARE. 21*/ 22 23L0: 24 mov (1|M0) r19.4<1>:uw 0x0:uw 25 mov (8|M0) r27.0<1>:ud r0.0<8;8,1>:ud 26 and (1|M0) r13.0<1>:ud r0.1<0;1,0>:ud 0xFFFFFF:ud 27 and (1|M0) r0.2<1>:uw r13.0<0;1,0>:uw 0xFFF:uw 28 shr (1|M0) r13.0<1>:ud r13.0<0;1,0>:ud 0xC:uw 29 mov (1|M0) r0.3<1>:uw r13.0<0;1,0>:uw 30 cmp (1|M0) (eq)f0.0 null.0<1>:uw r19.4<0;1,0>:uw 0x0:uw 31(W&f0.0) jmpi L304 32L128: 33 mov (8|M0) r14.0<1>:w 0x76543210:v 34 shl (8|M0) r14.0<1>:w r14.0<8;8,1>:w 4:w 35 mov (4|M0) r20.0<1>:ud 0x4000400:ud 36 mov (4|M0) r20.4<1>:ud 0x4800480:ud 37 add (8|M0) r20.0<1>:uw r20.0<8;8,1>:uw r14.0<8;8,1>:w 38 add (8|M0) r20.8<1>:uw r20.8<8;8,1>:uw r14.0<8;8,1>:w 39 add (8|M0) r21.0<1>:ud r20.0<8;8,1>:ud 0x1000100:ud 40 send (16|M0) r2 r20:ub 0xC 0x048050FE 41 mov (1|M0) r0.1<1>:ud r9.3<0;1,0>:ud 42 add (1|M0) r0.2<1>:uw r0.2<0;1,0>:uw 0x1:uw 43(W) jmpi L576 44L304: 45 mov (1|M0) r9.3<1>:ud r0.1<0;1,0>:ud 46 mov (8|M0) r14.0<1>:w 0x76543210:v 47 shl (8|M0) r14.0<1>:w r14.0<8;8,1>:w 4:w 48 mov (4|M0) r20.0<1>:ud 0x4000400:ud 49 mov (4|M0) r20.4<1>:ud 0x4800480:ud 50 add (8|M0) r20.0<1>:uw r20.0<8;8,1>:uw r14.0<8;8,1>:w 51 add (8|M0) r20.8<1>:uw r20.8<8;8,1>:uw r14.0<8;8,1>:w 52 add (8|M0) r21.0<1>:ud r20.0<8;8,1>:ud 0x1000100:ud 53 mov (8|M0) r22.0<1>:ud r2.0<8;8,1>:ud 54 mov (8|M0) r23.0<1>:ud r3.0<8;8,1>:ud 55 mov (8|M0) r24.0<1>:ud r4.0<8;8,1>:ud 56 mov (8|M0) r25.0<1>:ud r5.0<8;8,1>:ud 57 mov (8|M0) r26.0<1>:ud r6.0<8;8,1>:ud 58 mov (8|M0) r27.0<1>:ud r7.0<8;8,1>:ud 59 mov (8|M0) r28.0<1>:ud r8.0<8;8,1>:ud 60 mov (8|M0) r29.0<1>:ud r9.0<8;8,1>:ud 61 send (16|M0) null:w r20:ub 0xC 0x140250FE 62L576: 63 mov (8|M0) r17.0<1>:ud 0xFFFFFFFF:ud 64 mov (8|M0) r16.0<1>:ud 0xFFFFFFFF:ud 65 mov (8|M0) r15.0<1>:ud 0xFFFFFFFF:ud 66 shl (2|M0) r0.2<1>:uw r0.2<2;2,1>:uw 0x4:uw 67 add (2|M0) r13.0<1>:w r0.2<2;2,1>:uw 0xF:uw 68 cmp (2|M0) (lt)f0.0 null.0<1>:uw r13.0<2;2,1>:uw r9.10<2;2,1>:uw 69(W&f0.0.any2h)jmpi L704 70L688: 71(W) jmpi L736 72L704: 73 mov (8|M0) r127.0<1>:ud r0.0<8;8,1>:ud 74 send (1|M0) null:d r127:ub 0x27 0x02000010 {EOT} // SPAWNER wr:1, rd:0, fc: 0x10 75L736: 76 cmp (2|M0) (lt)f0.0 null.0<1>:uw r0.2<2;2,1>:uw r9.10<2;2,1>:uw 77(f0.0) cmp (2|M0) (ge)f0.0 null.0<1>:uw r13.0<2;2,1>:uw r9.10<2;2,1>:uw 78(f0.0) mov (2|M0) r0.2<1>:uw r9.10<2;2,1>:uw 79 cmp (16|M0) (lt)f0.0 null.0<1>:w r0.2<0;2,1>:w r7.0<16;16,1>:w 80(f0.0) subb (16|M0) r16.0<1>:w r7.0<16;16,1>:w r0.2<0;2,1>:w {AccWrEn} 81 cmp (16|M0) (gt)f0.1 null.0<1>:w r16.0<16;16,1>:w 16:w 82(f0.1) mov (16|M0) r16.0<1>:w 16:w 83(f0.0) shl (16|M0) r16.0<1>:uw r17.0<16;16,1>:uw r16.0<16;16,1>:uw 84 add (16|M0) r13.0<1>:w r0.2<0;2,1>:uw 0xF:uw 85 cmp (16|M0) (gt)f0.0 null.0<1>:w r13.0<16;16,1>:w r8.0<16;16,1>:w 86(f0.0) subb (16|M0) r14.0<1>:w r13.0<16;16,1>:w r8.0<16;16,1>:w 87 cmp (16|M0) (gt)f0.1 null.0<1>:w r14.0<16;16,1>:w 16:w 88(f0.1) mov (16|M0) r14.0<1>:w 16:w 89(f0.0) shr (16|M0) r15.0<1>:uw r17.0<16;16,1>:uw r14.0<16;16,1>:uw 90 and (16|M0) r17.0<1>:uw r16.0<16;16,1>:uw r15.0<16;16,1>:uw 91 add (1|M0) r9.4<1>:uw r9.4<0;1,0>:uw 0x8:uw 92 shr (1|M0) r9.4<1>:uw r9.4<0;1,0>:uw 0x4:uw 93 shl (1|M0) r9.4<1>:uw r9.4<0;1,0>:uw 0x4:uw 94 add (1|M0) r9.5<1>:uw r9.5<0;1,0>:uw 0x8:uw 95 shr (1|M0) r9.5<1>:uw r9.5<0;1,0>:uw 0x4:uw 96 shl (1|M0) r9.5<1>:uw r9.5<0;1,0>:uw 0x4:uw 97 mov (2|M0) r8.3<1>:f r9.4<2;2,1>:w 98 cmp (1|M0) (eq)f0.0 null.0<1>:w r2.26<0;1,0>:ub 0x1:uw 99(W&~f0.0) jmpi L1920 100L1120: 101 shr (1|M0) r14.6<1>:w r0.2<0;1,0>:w 4:w 102 mov (1|M0) r14.6<1>:f r14.6<0;1,0>:w 103 mov (1|M0) r8.5<1>:f r14.6<0;1,0>:w 104 mov (1|M0) r13.6<1>:f r0.2<0;1,0>:w 105 shr (2|M0) r13.0<1>:w r9.4<2;2,1>:w 4:w 106 mov (2|M0) r13.1<1>:f r13.0<2;2,1>:w 107 cmp (1|M0) (lt)f0.0 null.0<1>:w r0.2<0;1,0>:w r9.4<0;1,0>:w 108(~f0.0) mov (1|M0) r14.6<1>:w r13.0<0;1,0>:w 109(~f0.0) mov (1|M0) r13.6<1>:f r9.4<0;1,0>:w 110 subb (1|M0) r14.7<1>:d r14.6<0;1,0>:w 1:w 111 mov (1|M0) r14.5<1>:f r14.7<0;1,0>:d 112 mul (1|M0) r14.5<1>:f r14.5<0;1,0>:f r9.1<0;1,0>:f 113 mul (1|M0) r14.5<1>:f r14.5<0;1,0>:f 0.5:f 114 add (1|M0) r14.5<1>:f r14.5<0;1,0>:f r9.0<0;1,0>:f 115 mul (1|M0) r14.5<1>:f r13.6<0;1,0>:f r14.5<0;1,0>:f 116 mul (8|M0) r16.0<1>:f r14.5<0;1,0>:f r3.0<8;8,1>:f 117(W&~f0.0) jmpi L1456 118L1392: 119 mov (1|M0) acc0.0<1>:f r9.0<0;1,0>:f 120 mac (1|M0) r14.0<1>:f r8.5<0;1,0>:f r9.1<0;1,0>:f 121 mov (1|M0) r9.0<1>:f r14.0<0;1,0>:f 122(W) jmpi L1888 123L1456: 124 cmp (1|M0) (lt)f0.0 null.0<1>:w r0.2<0;1,0>:w r9.5<0;1,0>:w 125(~f0.0) mov (1|M0) r14.6<1>:w r9.5<0;1,0>:w 126(f0.0) mov (1|M0) r14.6<1>:w r0.2<0;1,0>:w 127 subb (1|M0) r14.6<1>:w r14.6<0;1,0>:w r9.4<0;1,0>:w 128 mov (1|M0) r14.7<1>:f r14.6<0;1,0>:w 129 mul (1|M0) r14.7<1>:f r9.3<0;1,0>:f r14.7<0;1,0>:f 130 mul (8|M0) r15.0<1>:f r14.7<0;1,0>:f r3.0<8;8,1>:f 131 add (8|M0) r16.0<1>:f r15.0<8;8,1>:f r16.0<8;8,1>:f 132(W&~f0.0) jmpi L1632 133L1600: 134 mov (1|M0) r9.0<1>:f r9.3<0;1,0>:f 135(W) jmpi L1888 136L1632: 137 shr (1|M0) r14.6<1>:w r0.2<0;1,0>:w 4:w 138 subb (1|M0) r14.6<1>:w r13.1<0;1,0>:w r14.6<0;1,0>:w 139 add (1|M0) r14.7<1>:d r14.6<0;1,0>:w 1:w 140 mov (1|M0) r13.5<1>:f r14.7<0;1,0>:d 141 mul (1|M0) r13.5<1>:f r13.5<0;1,0>:f r9.1<0;1,0>:f 142 mul (1|M0) r13.5<1>:f r13.5<0;1,0>:f 0.5:f 143 add (1|M0) r13.5<1>:f r13.5<0;1,0>:f r9.4<0;1,0>:f 144 subb (1|M0) r13.6<1>:w r0.2<0;1,0>:w r9.5<0;1,0>:w 145 mov (1|M0) r13.6<1>:f r13.6<0;1,0>:w 146 mul (1|M0) r13.5<1>:f r13.6<0;1,0>:f r13.5<0;1,0>:f 147 mul (8|M0) r15.0<1>:f r13.5<0;1,0>:f r3.0<8;8,1>:f 148 add (8|M0) r16.0<1>:f r16.0<8;8,1>:f r15.0<8;8,1>:f 149 mov (1|M0) r14.6<1>:f r14.6<0;1,0>:w 150 mov (1|M0) acc0.0<1>:f r9.4<0;1,0>:f 151 mac (1|M0) r14.0<1>:f r14.6<0;1,0>:f r9.1<0;1,0>:f 152 mov (1|M0) r9.0<1>:f r14.0<0;1,0>:f 153L1888: 154 add (8|M0) r6.0<1>:f r6.0<8;8,1>:f r16.0<8;8,1>:f 155 mov (1|M0) r9.1<1>:f 0.0:f 156L1920: 157 mov (1|M0) r7.0<1>:ud r0.1<0;1,0>:ud 158 mov (2|M0) r7.1<1>:ud r17.0<2;2,1>:ud 159 mov (1|M0) r7.3<1>:ud r17.2<0;1,0>:ud 160 mov (4|M0) r8.0<1>:ud r17.3<4;4,1>:ud 161 mov (1|M0) r8.4<1>:ud r17.7<0;1,0>:ud 162 mov (2|M0) r7.4<1>:f r9.0<2;2,1>:f 163 mov (1|M0) r7.6<1>:ud 0x0:ud 164 mov (1|M0) r7.7<1>:ud 0x0:ud 165 mov (8|M0) r25.0<1>:ud r0.0<8;8,1>:ud 166 mov (8|M0) r26.0<1>:ud r0.0<8;8,1>:ud 167 mul (8|M0) r3.0<1>:f r3.0<8;8,1>:f r7.4<0;1,0>:f 168 mov (2|M0) r8.5<1>:f r7.0<2;2,1>:w 169 cmp (8|M0) (eq)f0.0 null.0<1>:w r2.26<0;1,0>:ub 0x1:uw 170 shr (1|M0) r13.0<1>:uw r2.2<0;1,0>:ud 0x0:ud 171 shr (1|M0) r13.1<1>:uw r2.2<0;1,0>:ud 0x3:ud 172 shr (1|M0) r13.2<1>:uw r2.2<0;1,0>:ud 0x6:ud 173 shr (1|M0) r13.3<1>:uw r2.2<0;1,0>:ud 0x9:ud 174 shr (1|M0) r13.4<1>:uw r2.2<0;1,0>:ud 0xC:ud 175 shr (1|M0) r13.5<1>:uw r2.2<0;1,0>:ud 0xF:ud 176 shr (1|M0) r13.6<1>:uw r2.2<0;1,0>:ud 0x12:ud 177 shr (1|M0) r13.7<1>:uw r2.2<0;1,0>:ud 0x15:ud 178 and (8|M0) r13.0<1>:uw r13.0<8;8,1>:uw 0x7:uw 179 mov (8|M0) r17.0<1>:f r8.5<0;1,0>:f 180 mov (8|M0) r15.0<1>:f r8.6<0;1,0>:f 181 cmp (8|M0) (eq)f0.1 null.0<1>:w r13.0<8;8,1>:uw 0x11111111:uv 182(f0.1) mov (8|M0) r17.0<1>:f r8.6<0;1,0>:f 183(f0.1) mov (8|M0) r14.0<1>:f r2.0<0;1,0>:uw 184(f0.1) add (8|M0) r15.0<1>:f -r8.5<0;1,0>:f r14.0<8;8,1>:f 185(f0.1) add (8|M0) r15.0<1>:f r15.0<8;8,1>:f -16.0:f 186 cmp (8|M0) (eq)f0.1 null.0<1>:w r13.0<8;8,1>:uw 0x22222222:uv 187(f0.1) mov (8|M0) r16.0<1>:f r2.0<0;1,0>:uw 188(f0.1) add (8|M0) r17.0<1>:f -r8.5<0;1,0>:f r16.0<8;8,1>:f 189(f0.1) add (8|M0) r17.0<1>:f r17.0<8;8,1>:f -16.0:f 190(f0.1) mov (8|M0) r14.0<1>:f r2.1<0;1,0>:uw 191(f0.1) add (8|M0) r15.0<1>:f -r8.6<0;1,0>:f r14.0<8;8,1>:f 192(f0.1) add (8|M0) r15.0<1>:f r15.0<8;8,1>:f -16.0:f 193 cmp (8|M0) (eq)f0.1 null.0<1>:w r13.0<8;8,1>:uw 0x33333333:uv 194(f0.1) mov (8|M0) r16.0<1>:f r2.1<0;1,0>:uw 195(f0.1) add (8|M0) r17.0<1>:f -r8.6<0;1,0>:f r16.0<8;8,1>:f 196(f0.1) add (8|M0) r17.0<1>:f r17.0<8;8,1>:f -16.0:f 197(f0.1) mov (8|M0) r15.0<1>:f r8.5<0;1,0>:f 198 cmp (8|M0) (eq)f0.1 null.0<1>:w r13.0<8;8,1>:uw 0x44444444:uv 199(f0.1) mov (8|M0) r16.0<1>:f r2.0<0;1,0>:uw 200(f0.1) add (8|M0) r17.0<1>:f -r8.5<0;1,0>:f r16.0<8;8,1>:f 201(f0.1) add (8|M0) r17.0<1>:f r17.0<8;8,1>:f -16.0:f 202(f0.1) mov (8|M0) r15.0<1>:f r8.6<0;1,0>:f 203 cmp (8|M0) (eq)f0.1 null.0<1>:w r13.0<8;8,1>:uw 0x77777777:uv 204(f0.1) mov (8|M0) r17.0<1>:f r8.6<0;1,0>:f 205(f0.1) mov (8|M0) r15.0<1>:f r8.5<0;1,0>:f 206 cmp (8|M0) (eq)f0.1 null.0<1>:w r13.0<8;8,1>:uw 0x55555555:uv 207(f0.1) mov (8|M0) r17.0<1>:f r8.5<0;1,0>:f 208(f0.1) mov (8|M0) r14.0<1>:f r2.1<0;1,0>:uw 209(f0.1) add (8|M0) r15.0<1>:f -r8.6<0;1,0>:f r14.0<8;8,1>:f 210(f0.1) add (8|M0) r15.0<1>:f r15.0<8;8,1>:f -16.0:f 211 cmp (8|M0) (eq)f0.1 null.0<1>:w r13.0<8;8,1>:uw 0x66666666:uv 212(f0.1) mov (8|M0) r16.0<1>:f r2.1<0;1,0>:uw 213(f0.1) add (8|M0) r17.0<1>:f -r8.6<0;1,0>:f r16.0<8;8,1>:f 214(f0.1) add (8|M0) r17.0<1>:f r17.0<8;8,1>:f -16.0:f 215(f0.1) mov (8|M0) r14.0<1>:f r2.0<0;1,0>:uw 216(f0.1) add (8|M0) r15.0<1>:f -r8.5<0;1,0>:f r14.0<8;8,1>:f 217(f0.1) add (8|M0) r15.0<1>:f r15.0<8;8,1>:f -16.0:f 218(~f0.0) mov (8|M0) acc0.0<1>:f r6.0<8;8,1>:f 219(~f0.0) mac (8|M0) r6.0<1>:f r3.0<8;8,1>:f r17.0<8;8,1>:f 220 mov (8|M0) acc0.0<1>:f r5.0<8;8,1>:f 221 mac (8|M0) r5.0<1>:f r4.0<8;8,1>:f r15.0<8;8,1>:f 222