1<?xml version="1.0" encoding="UTF-8"?> 2<!-- 3Copyright © 2020 Google, Inc. 4 5Permission is hereby granted, free of charge, to any person obtaining a 6copy of this software and associated documentation files (the "Software"), 7to deal in the Software without restriction, including without limitation 8the rights to use, copy, modify, merge, publish, distribute, sublicense, 9and/or sell copies of the Software, and to permit persons to whom the 10Software is furnished to do so, subject to the following conditions: 11 12The above copyright notice and this permission notice (including the next 13paragraph) shall be included in all copies or substantial portions of the 14Software. 15 16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22SOFTWARE. 23 --> 24 25<isa> 26 27<!-- 28 Cat7 Instructions: barrier, cache, sleep instructions 29 --> 30 31<bitset name="#instruction-cat7" extends="#instruction"> 32 <pattern low="0" high="31">xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx</pattern> 33 <pattern low="32" high="43">xxxxxxxxxxxx</pattern> 34 <field pos="44" name="SS" type="bool" display="(ss)"/> 35 <field pos="59" name="JP" type="bool" display="(jp)"/> 36 <field pos="60" name="SY" type="bool" display="(sy)"/> 37 <pattern low="61" high="63">111</pattern> <!-- cat7 --> 38</bitset> 39 40<bitset name="#instruction-cat7-barrier" extends="#instruction-cat7"> 41 <display> 42 {SY}{SS}{JP}{NAME}{G}{L}{R}{W} 43 </display> 44 <pattern low="45" high="50">x1xxxx</pattern> 45 <field pos="51" name="W" type="bool" display=".w" /> <!-- write --> 46 <field pos="52" name="R" type="bool" display=".r" /> <!-- read --> 47 <field pos="53" name="L" type="bool" display=".l" /> <!-- local --> 48 <field pos="54" name="G" type="bool" display=".g" /> <!-- global --> 49 <encode> 50 <map name="W">src->cat7.w</map> 51 <map name="R">src->cat7.r</map> 52 <map name="L">src->cat7.l</map> 53 <map name="G">src->cat7.g</map> 54 </encode> 55</bitset> 56 57<bitset name="bar" extends="#instruction-cat7-barrier"> 58 <pattern low="55" high="58">0000</pattern> 59</bitset> 60 61<bitset name="fence" extends="#instruction-cat7-barrier"> 62 <pattern low="55" high="58">0001</pattern> 63</bitset> 64 65<enum name="#dccln-type"> 66 <value val="0" display=".shr"/> 67 <value val="1" display=".all"/> 68</enum> 69 70<bitset name="#instruction-cat7-data" extends="#instruction-cat7"> 71 <display> 72 {SY}{SS}{JP}{NAME}{TYPE} 73 </display> 74 <pattern low="45" high="50">xxxxxx</pattern> 75 <field pos="51" name="TYPE" type="#dccln-type"/> 76 <pattern low="52" high="54">xx0</pattern> 77 <encode> 78 <!-- TODO: read handle type --> 79 <map name="TYPE">1</map> 80 </encode> 81</bitset> 82 83<enum name="#sleep-duration"> 84 <value val="0" display=".s"> 85 <doc> 86 Short sleep 87 </doc> 88 </value> 89 <value val="1" display=".l"> 90 <doc> 91 Long sleep, around 20x longer than short 92 </doc> 93 </value> 94</enum> 95 96<bitset name="sleep" extends="#instruction-cat7"> 97 <doc> 98 Short/Long Sleep 99 TODO: how is it different from a bunch of nops? 100 </doc> 101 <display> 102 {SY}{SS}{JP}{NAME}{DURATION} 103 </display> 104 <pattern low="45" high="50">xxxxxx</pattern> 105 <field pos="51" name="DURATION" type="#sleep-duration"/> 106 <pattern low="52" high="54">xxx</pattern> 107 <pattern low="55" high="58">0010</pattern> 108 <encode> 109 <!-- TODO: read duration --> 110 <map name="DURATION">1</map> 111 </encode> 112</bitset> 113 114<bitset name="icinv" extends="#instruction-cat7"> 115 <doc> 116 Seem to be Instruction Cache Invalidate, supported by the fact 117 that it considerably slows shader execution compared to 118 data cache instructions. 119 </doc> 120 <display> 121 {SY}{SS}{JP}{NAME} 122 </display> 123 <pattern low="45" high="54">xxxxxxxxxx</pattern> 124 <pattern low="55" high="58">0011</pattern> 125</bitset> 126 127<bitset name="dccln" extends="#instruction-cat7-data"> 128 <doc> 129 Data (Cache?) Clean 130 </doc> 131 <pattern low="55" high="58">0100</pattern> 132</bitset> 133 134<bitset name="dcinv" extends="#instruction-cat7-data"> 135 <doc> 136 Data (Cache?) Invalidate 137 </doc> 138 <pattern low="55" high="58">0101</pattern> 139</bitset> 140 141<bitset name="dcflu" extends="#instruction-cat7-data"> 142 <doc> 143 Data (Cache?) Flush 144 </doc> 145 <pattern low="55" high="58">0110</pattern> 146</bitset> 147 148<bitset name="ccinv" extends="#instruction-cat7"> 149 <doc> 150 _Presumably_ invalidates workgroup-wide cache for image/buffer data access. 151 So while "fence" is enough to synchronize data access inside a workgroup, 152 for cross-workgroup synchronization we have to invalidate that cache. 153 </doc> 154 <gen min="700"/> 155 <display> 156 {SY}{SS}{JP}{NAME} 157 </display> 158 <pattern low="45" high="50">x1xxxx</pattern> 159 <pattern low="51" high="54">1010</pattern> 160 <pattern low="55" high="58">0101</pattern> 161</bitset> 162 163<bitset name="lock" extends="#instruction-cat7"> 164 <doc> 165 Are met at the end of compute shader: 166 (sy)(jp)lock; 167 unlock; 168 end ; 169 </doc> 170 <gen min="700"/> 171 <display> 172 {SY}{SS}{JP}{NAME} 173 </display> 174 <pattern low="45" high="54">1000010000</pattern> 175 <pattern low="55" high="58">0111</pattern> 176</bitset> 177 178<bitset name="unlock" extends="#instruction-cat7"> 179 <gen min="700"/> 180 <display> 181 {SY}{SS}{JP}{NAME} 182 </display> 183 <pattern low="45" high="54">1001010000</pattern> 184 <pattern low="55" high="58">0111</pattern> 185</bitset> 186 187<bitset name="#alias-immed-src" size="32"> 188 <override> 189 <expr> 190 {TYPE} == 0 && {TYPE_SIZE} == 0 /* f16 */ 191 </expr> 192 <display> 193 h({IMMED}) 194 </display> 195 <field name="IMMED" low="0" high="15" type="float"/> 196 </override> 197 <override> 198 <expr> 199 {TYPE} == 0 && {TYPE_SIZE} == 1 /* f32 */ 200 </expr> 201 <display> 202 ({IMMED}) 203 </display> 204 <field name="IMMED" low="0" high="31" type="float"/> 205 </override> 206 <override> 207 <expr> 208 {TYPE_SIZE} == 0 /* b16 */ 209 </expr> 210 <display> 211 h(0x{IMMED}) 212 </display> 213 <field name="IMMED" low="0" high="15" type="hex"/> 214 </override> 215 216 <display> 217 (0x{IMMED}) 218 </display> 219 220 <field name="IMMED" low="0" high="31" type="hex"/> 221 <encode type="struct ir3_register *"> 222 <map name="IMMED">extract_reg_uim(src)</map> 223 </encode> 224</bitset> 225 226<bitset name="#alias-const-src" size="11"> 227 <display> 228 {HALF}{CONST} 229 </display> 230 <field name="CONST" low="0" high="10" type="#reg-const"/> 231 <derived name="HALF" type="bool" display="h"> 232 <expr> 233 ({TYPE_SIZE} == 0) /* b16 */ 234 </expr> 235 </derived> 236 <encode type="struct ir3_register *"> 237 <map name="CONST">src</map> 238 </encode> 239</bitset> 240 241<bitset name="#alias-gpr-src" size="8"> 242 <display> 243 {HALF}{SRC} 244 </display> 245 <field name="SRC" low="0" high="7" type="#reg-gpr"/> 246 <derived name="HALF" type="bool" display="h"> 247 <expr> 248 ({TYPE_SIZE} == 0) /* b16 */ 249 </expr> 250 </derived> 251 <encode type="struct ir3_register *"> 252 <map name="SRC">src</map> 253 </encode> 254</bitset> 255 256<enum name="#alias-scope"> 257 <value val="0" display="tex"/> 258 <value val="1" display="rt"/> 259 <value val="2" display="mem"/> 260 <value val="3" display="mem"/> 261</enum> 262 263<enum name="#alias-type-size"> 264 <value val="0" display="16"/> 265 <value val="1" display="32"/> 266</enum> 267 268<enum name="#alias-type"> 269 <doc> 270 The type (float or int) of sources. This seems to have no 271 functional effect and only changes how immediates are displayed. 272 Note that the blob uses i16/i32 when the scope is rt or the 2nd 273 mem, but this is not implemented here. Also note that the blob 274 uses b16/b32 for alias.tex when the type is 0, even though it 275 still prints immediates as floats in that case. 276 </doc> 277 <value val="0" display="f"/> 278 <value val="1" display="b"/> 279</enum> 280 281<enum name="#alias-src-reg-type"> 282 <value val="0" display="GPR"/> 283 <value val="1" display="CONST"/> 284 <value val="2" display="IMMED"/> 285</enum> 286 287<bitset name="#dst-rt" size="5"> 288 <display> 289 rt{RT}.{SWIZ} 290 </display> 291 <field name="SWIZ" low="0" high="1" type="#swiz"/> 292 <field name="RT" low="2" high="4" type="uint"/> 293 <encode type="struct ir3_register *"> 294 <map name="RT">src->num >> 2</map> 295 <map name="SWIZ">src->num & 0x3</map> 296 </encode> 297</bitset> 298 299<bitset name="alias" extends="#instruction"> 300 <doc> 301 For alias.tex (and possibly alias.mem): 302 303 Add an entry to the scope-specific "alias table", when instruction 304 from that scope tries to access a source register it would search 305 its alias table first. 306 307 This allows to reduce the amount of data passed around when reading 308 immediates/constants and reduce register pressure. In addition, 309 the alias table could be populated in the preamble further reducing 310 the amount of instructions being run. 311 312 Used like this: 313 alias.tex.b32.1 r40.x, (-1.456763); 314 alias.tex.b32.0 r40.y, (0.056702); 315 gather4g.s2en.mode6.base0 (f32)(xyzw)r0.x, r40.x, 1; 316 Or this: 317 alias.tex.b32.0 r2.y, c1.w; 318 isam.s2en.mode6.base0.1d (f32)(xyzw)r46.z, r2.y, 0; 319 (sy)stib.f32.2d.4.mode4.base0 r46.z, r2.y, 1; 320 321 Notice the lack of nops between alias and the instruction 322 that uses it. 323 324 In this case, the size of the table is bounded (max 16 entries) 325 and the number of entries in the table is indicated by 326 the first alias instruction in the TABLE_SIZE_MINUS_ONE field. 327 328 For alias.rt: create an entry into the alias table for a render 329 target. For example, this would map the 4th component of render 330 target 0 (FRAG_RESULT_DATA0) to the constant 0x0: 331 alias.rt.b32.0 rt0.w, (0x0) 332 333 alias.rt has to be executed in the preamble and can only map 334 const registers and immediates. Additionally, the register 335 SP_PS_ALIASED_COMPONENTS has to be set to a mask of render 336 target components that will be aliased. 337 338 In this case, the size of the table is only bounded by the 339 number of render targets/components and the TABLE_SIZE_MINUS_ONE 340 field is not used. 341 </doc> 342 <gen min="700"/> 343 <display> 344 {SY}{SS}{JP}{NAME}.{SCOPE}.{TYPE}{TYPE_SIZE}.{TABLE_SIZE_MINUS_ONE} {DST_HALF}{DST}, {SRC} 345 </display> 346 347 <override> 348 <expr>{SRC_REG_TYPE} == 0</expr> 349 <field name="SRC" low="0" high="7" type="#alias-gpr-src"> 350 <param name="TYPE_SIZE"/> 351 </field> 352 <pattern low="8" high="31">000000000000000000000000</pattern> 353 </override> 354 <override> 355 <expr>{SRC_REG_TYPE} == 1</expr> 356 <field name="SRC" low="0" high="10" type="#alias-const-src"> 357 <param name="TYPE_SIZE"/> 358 </field> 359 <pattern low="11" high="31">000000000000000000000</pattern> 360 </override> 361 362 <field name="SRC" low="0" high="31" type="#alias-immed-src"> 363 <param name="TYPE_SIZE"/> 364 <param name="TYPE"/> 365 </field> 366 367 <override> 368 <expr>{SCOPE} == 1</expr> 369 <field low="32" high="36" name="DST" type="#dst-rt"/> 370 <pattern low="37" high="39">000</pattern> 371 <derived name="DST_HALF" expr="#false" type="bool" display=""/> 372 </override> 373 <field low="32" high="39" name="DST" type="#reg-gpr"/> 374 <field low="40" high="43" name="TABLE_SIZE_MINUS_ONE" type="uint"/> 375 <field pos="44" name="SS" type="bool" display="(ss)"/> 376 <pattern low="45" high="46">xx</pattern> 377 <field pos="47" name="SCOPE_LO" type="uint"/> 378 <field pos="48" name="TYPE" type="#alias-type"/> 379 <field pos="49" name="SCOPE_HI" type="uint"/> 380 <field pos="50" name="TYPE_SIZE" type="#alias-type-size"/> 381 <field low="51" high="52" name="SRC_REG_TYPE" type="#alias-src-reg-type"/> 382 <pattern low="53" high="54">1x</pattern> 383 <pattern low="55" high="58">1000</pattern> <!-- OPC --> 384 <field pos="59" name="JP" type="bool" display="(jp)"/> 385 <field pos="60" name="SY" type="bool" display="(sy)"/> 386 <pattern low="61" high="63">111</pattern> <!-- cat7 --> 387 <derived name="SCOPE" type="#alias-scope"> 388 <expr>({SCOPE_HI} << 1) | {SCOPE_LO}</expr> 389 </derived> 390 <derived name="DST_HALF" type="bool" display="h"> 391 <expr>{TYPE_SIZE} == 0</expr> 392 </derived> 393 <encode> 394 <map name="SRC">src->srcs[0]</map> 395 <map name="SRC_REG_TYPE">(src->srcs[0]->flags & IR3_REG_CONST) ? 1 : ((src->srcs[0]->flags & IR3_REG_IMMED) ? 2 : 0)</map> 396 <map name="TABLE_SIZE_MINUS_ONE">src->cat7.alias_table_size_minus_one</map> 397 <map name="TYPE_SIZE">(src->srcs[0]->flags & IR3_REG_HALF) ? 0 : 1</map> 398 <map name="SCOPE_LO">src->cat7.alias_scope & 0x1</map> 399 <map name="SCOPE_HI">src->cat7.alias_scope >> 1</map> 400 <map name="TYPE">!src->cat7.alias_type_float</map> 401 </encode> 402</bitset> 403 404</isa>