1<?xml version="1.0" encoding="UTF-8"?> 2<!-- 3Copyright © 2020 Google, Inc. 4 5Permission is hereby granted, free of charge, to any person obtaining a 6copy of this software and associated documentation files (the "Software"), 7to deal in the Software without restriction, including without limitation 8the rights to use, copy, modify, merge, publish, distribute, sublicense, 9and/or sell copies of the Software, and to permit persons to whom the 10Software is furnished to do so, subject to the following conditions: 11 12The above copyright notice and this permission notice (including the next 13paragraph) shall be included in all copies or substantial portions of the 14Software. 15 16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22SOFTWARE. 23 --> 24 25<isa> 26 27<!-- 28 Cat7 Instructions: barrier, cache, sleep instructions 29 --> 30 31<bitset name="#instruction-cat7" extends="#instruction"> 32 <pattern low="0" high="31">xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx</pattern> 33 <pattern low="32" high="43">xxxxxxxxxxxx</pattern> 34 <field pos="44" name="SS" type="bool" display="(ss)"/> 35 <field pos="59" name="JP" type="bool" display="(jp)"/> 36 <field pos="60" name="SY" type="bool" display="(sy)"/> 37 <pattern low="61" high="63">111</pattern> <!-- cat7 --> 38</bitset> 39 40<bitset name="#instruction-cat7-barrier" extends="#instruction-cat7"> 41 <display> 42 {SY}{SS}{JP}{NAME}{G}{L}{R}{W} 43 </display> 44 <pattern low="45" high="50">x1xxxx</pattern> 45 <field pos="51" name="W" type="bool" display=".w" /> <!-- write --> 46 <field pos="52" name="R" type="bool" display=".r" /> <!-- read --> 47 <field pos="53" name="L" type="bool" display=".l" /> <!-- local --> 48 <field pos="54" name="G" type="bool" display=".g" /> <!-- global --> 49 <encode> 50 <map name="W">src->cat7.w</map> 51 <map name="R">src->cat7.r</map> 52 <map name="L">src->cat7.l</map> 53 <map name="G">src->cat7.g</map> 54 </encode> 55</bitset> 56 57<bitset name="bar" extends="#instruction-cat7-barrier"> 58 <pattern low="55" high="58">0000</pattern> 59</bitset> 60 61<bitset name="fence" extends="#instruction-cat7-barrier"> 62 <pattern low="55" high="58">0001</pattern> 63</bitset> 64 65<enum name="#dccln-type"> 66 <value val="0" display=".shr"/> 67 <value val="1" display=".all"/> 68</enum> 69 70<bitset name="#instruction-cat7-data" extends="#instruction-cat7"> 71 <display> 72 {SY}{SS}{JP}{NAME}{TYPE} 73 </display> 74 <pattern low="45" high="50">xxxxxx</pattern> 75 <field pos="51" name="TYPE" type="#dccln-type"/> 76 <pattern low="52" high="54">xx0</pattern> 77 <encode> 78 <!-- TODO: read handle type --> 79 <map name="TYPE">1</map> 80 </encode> 81</bitset> 82 83<enum name="#sleep-duration"> 84 <value val="0" display=".s"> 85 <doc> 86 Short sleep 87 </doc> 88 </value> 89 <value val="1" display=".l"> 90 <doc> 91 Long sleep, around 20x longer than short 92 </doc> 93 </value> 94</enum> 95 96<bitset name="sleep" extends="#instruction-cat7"> 97 <doc> 98 Short/Long Sleep 99 TODO: how is it different from a bunch of nops? 100 </doc> 101 <display> 102 {SY}{SS}{JP}{NAME}{DURATION} 103 </display> 104 <pattern low="45" high="50">xxxxxx</pattern> 105 <field pos="51" name="DURATION" type="#sleep-duration"/> 106 <pattern low="52" high="54">xxx</pattern> 107 <pattern low="55" high="58">0010</pattern> 108 <encode> 109 <!-- TODO: read duration --> 110 <map name="DURATION">1</map> 111 </encode> 112</bitset> 113 114<bitset name="icinv" extends="#instruction-cat7"> 115 <doc> 116 Seem to be Instruction Cache Invalidate, supported by the fact 117 that it considerably slows shader execution compared to 118 data cache instructions. 119 </doc> 120 <display> 121 {SY}{SS}{JP}{NAME} 122 </display> 123 <pattern low="45" high="54">xxxxxxxxxx</pattern> 124 <pattern low="55" high="58">0011</pattern> 125</bitset> 126 127<bitset name="dccln" extends="#instruction-cat7-data"> 128 <doc> 129 Data (Cache?) Clean 130 </doc> 131 <pattern low="55" high="58">0100</pattern> 132</bitset> 133 134<bitset name="dcinv" extends="#instruction-cat7-data"> 135 <doc> 136 Data (Cache?) Invalidate 137 </doc> 138 <pattern low="55" high="58">0101</pattern> 139</bitset> 140 141<bitset name="dcflu" extends="#instruction-cat7-data"> 142 <doc> 143 Data (Cache?) Flush 144 </doc> 145 <pattern low="55" high="58">0110</pattern> 146</bitset> 147 148<bitset name="ccinv" extends="#instruction-cat7"> 149 <doc> 150 _Presumably_ invalidates workgroup-wide cache for image/buffer data access. 151 So while "fence" is enough to synchronize data access inside a workgroup, 152 for cross-workgroup synchronization we have to invalidate that cache. 153 </doc> 154 <gen min="700"/> 155 <display> 156 {SY}{SS}{JP}{NAME} 157 </display> 158 <pattern low="45" high="50">x1xxxx</pattern> 159 <pattern low="51" high="54">1010</pattern> 160 <pattern low="55" high="58">0101</pattern> 161</bitset> 162 163<bitset name="lock" extends="#instruction-cat7"> 164 <doc> 165 Are met at the end of compute shader: 166 (sy)(jp)lock; 167 unlock; 168 end ; 169 </doc> 170 <gen min="700"/> 171 <display> 172 {SY}{SS}{JP}{NAME} 173 </display> 174 <pattern low="45" high="54">1000010000</pattern> 175 <pattern low="55" high="58">0111</pattern> 176</bitset> 177 178<bitset name="unlock" extends="#instruction-cat7"> 179 <gen min="700"/> 180 <display> 181 {SY}{SS}{JP}{NAME} 182 </display> 183 <pattern low="45" high="54">1001010000</pattern> 184 <pattern low="55" high="58">0111</pattern> 185</bitset> 186 187<bitset name="#alias-immed-src" size="32"> 188 <override> 189 <expr> 190 {SRC_TYPE} == 0 /* b16 */ 191 </expr> 192 <display> 193 h({IMMED}) 194 </display> 195 <field name="IMMED" low="0" high="15" type="float"/> 196 </override> 197 <override> 198 <expr> 199 {SRC_TYPE} == 1 /* b32 */ 200 </expr> 201 <display> 202 ({IMMED}) 203 </display> 204 <field name="IMMED" low="0" high="31" type="float"/> 205 </override> 206 207 <display> 208 {IMMED} 209 </display> 210 211 <field name="IMMED" low="0" high="31" type="uint"/> 212 <encode type="struct ir3_register *"> 213 <map name="IMMED">extract_reg_uim(src)</map> 214 </encode> 215</bitset> 216 217<bitset name="#alias-const-src" size="11"> 218 <display> 219 {HALF}{CONST} 220 </display> 221 <field name="CONST" low="0" high="10" type="#reg-const"/> 222 <derived name="HALF" type="bool" display="h"> 223 <expr> 224 ({SRC_TYPE} == 0) /* b16 */ 225 </expr> 226 </derived> 227 <encode type="struct ir3_register *"> 228 <map name="CONST">src</map> 229 </encode> 230</bitset> 231 232<bitset name="#alias-gpr-src" size="8"> 233 <display> 234 {HALF}{SRC} 235 </display> 236 <field name="SRC" low="0" high="7" type="#reg-gpr"/> 237 <derived name="HALF" type="bool" display="h"> 238 <expr> 239 ({SRC_TYPE} == 0) /* b16 */ 240 </expr> 241 </derived> 242 <encode type="struct ir3_register *"> 243 <map name="SRC">src</map> 244 </encode> 245</bitset> 246 247<enum name="#alias-scope"> 248 <doc> 249 TODO: Yes, something is wrong here, needs to be tested. 250 </doc> 251 <value val="0" display="tex"/> 252 <value val="2" display="tex"/> 253 <value val="3" display="rt"/> 254 <value val="4" display="mem"/> 255</enum> 256 257<enum name="#alias-src-type"> 258 <doc> 259 These types are clearly used by the blob. 260 However, it seems that there may be f32/f16/i32/i16 261 types but they are interwind with the _scope_ bitfield. 262 TODO: Check if it does matter. 263 </doc> 264 <value val="0" display="b16"/> 265 <value val="1" display="b32"/> 266</enum> 267 268<enum name="#alias-src-reg-type"> 269 <value val="0" display="GPR"/> 270 <value val="1" display="CONST"/> 271 <value val="2" display="IMMED"/> 272</enum> 273 274<bitset name="alias" extends="#instruction"> 275 <doc> 276 Add an entry to the scope-specific "alias table", when instruction 277 from that scope tries to access a source register it would search 278 its alias table first. 279 280 This allows to reduce the amount of data passed around when reading 281 immediates/constants and reduce register pressure. In addition, 282 the alias table could be populated in the preamble further reducing 283 the amount of instructions being run. 284 285 Used like this: 286 alias.tex.b32.1 r40.x, (-1.456763); 287 alias.tex.b32.0 r40.y, (0.056702); 288 gather4g.s2en.mode6.base0 (f32)(xyzw)r0.x, r40.x, 1; 289 Or this: 290 alias.tex.b32.0 r2.y, c1.w; 291 isam.s2en.mode6.base0.1d (f32)(xyzw)r46.z, r2.y, 0; 292 (sy)stib.f32.2d.4.mode4.base0 r46.z, r2.y, 1; 293 294 Notice the lack of nops between alias and the instruction 295 that uses it. 296 </doc> 297 <gen min="700"/> 298 <display> 299 {SY}{SS}{JP}{NAME}.{SCOPE}.{SRC_TYPE}.{UNK} {DST}, {SRC} 300 </display> 301 302 <override> 303 <expr>{SRC_REG_TYPE} == 0</expr> 304 <field name="SRC" low="0" high="7" type="#alias-gpr-src"> 305 <param name="SRC_TYPE"/> 306 </field> 307 <pattern low="8" high="31">000000000000000000000000</pattern> 308 </override> 309 <override> 310 <expr>{SRC_REG_TYPE} == 1</expr> 311 <field name="SRC" low="0" high="10" type="#alias-const-src"> 312 <param name="SRC_TYPE"/> 313 </field> 314 <pattern low="11" high="31">000000000000000000000</pattern> 315 </override> 316 317 <field name="SRC" low="0" high="31" type="#alias-immed-src"> 318 <param name="SRC_TYPE"/> 319 </field> 320 <field low="32" high="39" name="DST" type="#reg-gpr"/> 321 <field low="40" high="43" name="UNK" type="uint"/> 322 <field pos="44" name="SS" type="bool" display="(ss)"/> 323 <pattern low="45" high="46">xx</pattern> 324 <field low="47" high="49" name="SCOPE" type="#alias-scope"/> 325 <field low="50" high="50" name="SRC_TYPE" type="#alias-src-type"/> 326 <field low="51" high="52" name="SRC_REG_TYPE" type="#alias-src-reg-type"/> 327 <pattern low="53" high="54">1x</pattern> 328 <pattern low="55" high="58">1000</pattern> <!-- OPC --> 329 <field pos="59" name="JP" type="bool" display="(jp)"/> 330 <field pos="60" name="SY" type="bool" display="(sy)"/> 331 <pattern low="61" high="63">111</pattern> <!-- cat7 --> 332 <encode> 333 <map name="SRC">src->srcs[0]</map> 334 <map name="SRC_REG_TYPE">(src->srcs[0]->flags & IR3_REG_CONST) ? 1 : ((src->srcs[0]->flags & IR3_REG_IMMED) ? 2 : 0)</map> 335 <map name="SRC_TYPE">1</map> <!-- TODO --> 336 <map name="UNK">extract_reg_uim(src->srcs[1])</map> 337 <map name="SCOPE">src->cat7.alias_scope</map> 338 </encode> 339</bitset> 340 341</isa>