• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3
4Permission is hereby granted, free of charge, to any person obtaining a
5copy of this software and associated documentation files (the "Software"),
6to deal in the Software without restriction, including without limitation
7the rights to use, copy, modify, merge, publish, distribute, sublicense,
8and/or sell copies of the Software, and to permit persons to whom the
9Software is furnished to do so, subject to the following conditions:
10
11The above copyright notice and this permission notice (including the next
12paragraph) shall be included in all copies or substantial portions of the
13Software.
14
15THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21SOFTWARE.
22"""
23
24opcodes = {}
25immediates = {}
26enums = {}
27
28class Opcode(object):
29   def __init__(self, name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32):
30      self.name = name
31      self.dests = dests
32      self.srcs = srcs
33      self.imms = imms
34      self.is_float = is_float
35      self.can_eliminate = can_eliminate
36      self.encoding_16 = encoding_16
37      self.encoding_32 = encoding_32
38
39class Immediate(object):
40   def __init__(self, name, ctype):
41      self.name = name
42      self.ctype = ctype
43
44class Encoding(object):
45   def __init__(self, description):
46      (exact, mask, length_short, length_long) = description
47
48      # Convenience
49      if length_long is None:
50         length_long = length_short
51
52      self.exact = exact
53      self.mask = mask
54      self.length_short = length_short
55      self.extensible = length_short != length_long
56
57      if self.extensible:
58         assert(length_long == length_short + (4 if length_short > 8 else 2))
59
60def op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False, can_eliminate = True, encoding_16 = None):
61   encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None
62   encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None
63
64   opcodes[name] = Opcode(name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32)
65
66def immediate(name, ctype = "uint32_t"):
67   imm = Immediate(name, ctype)
68   immediates[name] = imm
69   return imm
70
71def enum(name, value_dict):
72   enums[name] = value_dict
73   return immediate(name, "enum agx_" + name)
74
75L = (1 << 15)
76_ = None
77
78FORMAT = immediate("format", "enum agx_format")
79IMM = immediate("imm")
80WRITEOUT = immediate("writeout")
81INDEX = immediate("index")
82COMPONENT = immediate("component")
83CHANNELS = immediate("channels")
84TRUTH_TABLE = immediate("truth_table")
85ROUND = immediate("round", "enum agx_round")
86SHIFT = immediate("shift")
87MASK = immediate("mask")
88BFI_MASK = immediate("bfi_mask")
89LOD_MODE = immediate("lod_mode", "enum agx_lod_mode")
90DIM = immediate("dim", "enum agx_dim")
91SCOREBOARD = immediate("scoreboard")
92ICOND = immediate("icond", "enum agx_icond")
93FCOND = immediate("fcond", "enum agx_fcond")
94NEST = immediate("nest")
95INVERT_COND = immediate("invert_cond")
96NEST = immediate("nest")
97TARGET = immediate("target", "agx_block *")
98PERSPECTIVE = immediate("perspective", "bool")
99SR = enum("sr", {
100   0:  'threadgroup_position_in_grid.x',
101   1:  'threadgroup_position_in_grid.y',
102   2:  'threadgroup_position_in_grid.z',
103   4:  'threads_per_threadgroup.x',
104   5:  'threads_per_threadgroup.y',
105   6:  'threads_per_threadgroup.z',
106   8:  'dispatch_threads_per_threadgroup.x',
107   9:  'dispatch_threads_per_threadgroup.y',
108   10: 'dispatch_threads_per_threadgroup.z',
109   48: 'thread_position_in_threadgroup.x',
110   49: 'thread_position_in_threadgroup.y',
111   50: 'thread_position_in_threadgroup.z',
112   51: 'thread_index_in_threadgroup',
113   52: 'thread_index_in_subgroup',
114   53: 'subgroup_index_in_threadgroup',
115   56: 'active_thread_index_in_quad',
116   58: 'active_thread_index_in_subgroup',
117   62: 'backfacing',
118   80: 'thread_position_in_grid.x',
119   81: 'thread_position_in_grid.y',
120   82: 'thread_position_in_grid.z',
121})
122
123FUNOP = lambda x: (x << 28)
124FUNOP_MASK = FUNOP((1 << 14) - 1)
125
126def funop(name, opcode):
127   op(name, (0x0A | L | (opcode << 28),
128      0x3F | L | (((1 << 14) - 1) << 28), 6, _),
129      srcs = 1, is_float = True)
130
131# Listing of opcodes
132funop("floor",     0b000000)
133funop("srsqrt",    0b000001)
134funop("dfdx",      0b000100)
135funop("dfdy",      0b000110)
136funop("rcp",       0b001000)
137funop("rsqrt",     0b001001)
138funop("sin_pt_1",  0b001010)
139funop("log2",      0b001100)
140funop("exp2",      0b001101)
141funop("sin_pt_2",  0b001110)
142funop("ceil",      0b010000)
143funop("trunc",     0b100000)
144funop("roundeven", 0b110000)
145
146op("fadd",
147      encoding_16 = (0x26 | L, 0x3F | L, 6, _),
148      encoding_32 = (0x2A | L, 0x3F | L, 6, _),
149      srcs = 2, is_float = True)
150
151op("fma",
152      encoding_16 = (0x36, 0x3F, 6, 8),
153      encoding_32 = (0x3A, 0x3F, 6, 8),
154      srcs = 3, is_float = True)
155
156op("fmul",
157      encoding_16 = ((0x16 | L), (0x3F | L), 6, _),
158      encoding_32 = ((0x1A | L), (0x3F | L), 6, _),
159      srcs = 2, is_float = True)
160
161op("mov_imm",
162      encoding_32 = (0x62, 0xFF, 6, 8),
163      encoding_16 = (0x62, 0xFF, 4, 6),
164      imms = [IMM])
165
166op("iadd",
167      encoding_32 = (0x0E, 0x3F | L, 8, _),
168      srcs = 2, imms = [SHIFT])
169
170op("imad",
171      encoding_32 = (0x1E, 0x3F | L, 8, _),
172      srcs = 3, imms = [SHIFT])
173
174op("bfi",
175      encoding_32 = (0x2E, 0x7F | (0x3 << 26), 8, _),
176      srcs = 3, imms = [BFI_MASK])
177
178op("bfeil",
179      encoding_32 = (0x2E | L, 0x7F | L | (0x3 << 26), 8, _),
180      srcs = 3, imms = [BFI_MASK])
181
182op("asr",
183      encoding_32 = (0x2E | L | (0x1 << 26), 0x7F | L | (0x3 << 26), 8, _),
184      srcs = 2)
185
186op("icmpsel",
187      encoding_32 = (0x12, 0x7F, 8, 10),
188      srcs = 4, imms = [ICOND])
189
190op("fcmpsel",
191      encoding_32 = (0x02, 0x7F, 8, 10),
192      srcs = 4, imms = [FCOND])
193
194# sources are coordinates, LOD, texture, sampler, offset
195# TODO: anything else?
196op("texture_sample",
197      encoding_32 = (0x32, 0x7F, 8, 10), # XXX WRONG SIZE
198      srcs = 5, imms = [DIM, LOD_MODE, MASK, SCOREBOARD])
199
200# sources are base, index
201op("device_load",
202      encoding_32 = (0x05, 0x7F, 6, 8),
203      srcs = 2, imms = [FORMAT, MASK, SCOREBOARD])
204
205op("wait", (0x38, 0xFF, 2, _), dests = 0,
206      can_eliminate = False, imms = [SCOREBOARD])
207
208op("get_sr", (0x72, 0x7F | L, 4, _), dests = 1, imms = [SR])
209
210op("sample_mask", (0x7fc1, 0xffff, 6, _), dests = 0, srcs = 1, can_eliminate = False)
211
212# Essentially same encoding
213op("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 0,
214      can_eliminate = False, imms = [FORMAT])
215
216op("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 1,
217      can_eliminate = False, imms = [FORMAT])
218
219for (name, exact) in [("any", 0xC000), ("none", 0xC200)]:
220   op("jmp_exec_" + name, (exact, (1 << 16) - 1, 6, _), dests = 0, srcs = 0,
221         can_eliminate = False, imms = [TARGET])
222
223# TODO: model implicit r0l destinations
224op("pop_exec", (0x52 | (0x3 << 9), ((1 << 48) - 1) ^ (0x3 << 7) ^ (0x3 << 11), 6, _),
225      dests = 0, srcs = 0, can_eliminate = False, imms = [NEST])
226
227for is_float in [False, True]:
228   mod_mask = 0 if is_float else (0x3 << 26) | (0x3 << 38)
229
230   for (cf, cf_op) in [("if", 0), ("else", 1), ("while", 2)]:
231      name = "{}_{}cmp".format(cf, "f" if is_float else "i")
232      exact = 0x42 | (0x0 if is_float else 0x10) | (cf_op << 9)
233      mask = 0x7F | (0x3 << 9) | mod_mask | (0x3 << 44)
234      imms = [NEST, FCOND if is_float else ICOND, INVERT_COND]
235
236      op(name, (exact, mask, 6, _), dests = 0, srcs = 2, can_eliminate = False,
237            imms = imms, is_float = is_float)
238
239op("bitop", (0x7E, 0x7F, 6, _), srcs = 2, imms = [TRUTH_TABLE])
240op("convert", (0x3E | L, 0x7F | L | (0x3 << 38), 6, _), srcs = 2, imms = [ROUND])
241op("ld_vary", (0x21, 0xBF, 8, _), srcs = 1, imms = [CHANNELS, PERSPECTIVE])
242op("ld_vary_flat", (0xA1, 0xBF, 8, _), srcs = 1, imms = [CHANNELS])
243op("st_vary", None, dests = 0, srcs = 2, can_eliminate = False)
244op("stop", (0x88, 0xFFFF, 2, _), dests = 0, can_eliminate = False)
245op("trap", (0x08, 0xFFFF, 2, _), dests = 0, can_eliminate = False)
246op("writeout", (0x48, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], can_eliminate = False)
247
248# Convenient aliases.
249op("mov", _, srcs = 1)
250op("not", _, srcs = 1)
251op("xor", _, srcs = 2)
252op("and", _, srcs = 2)
253op("or", _, srcs = 2)
254
255# Indicates the logical end of the block, before final branches/control flow
256op("p_logical_end", _, dests = 0, srcs = 0, can_eliminate = False)
257
258op("p_combine", _, srcs = 4)
259op("p_split", _, srcs = 1, dests = 4)
260op("p_extract", _, srcs = 1, imms = [COMPONENT])
261
262# Phis are special-cased in the IR as they (uniquely) can take an unbounded
263# number of source.
264op("phi", _, srcs = 0)
265