• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <string.h>
25 #include "util/macros.h"
26 #include "util/bitscan.h"
27 
28 #include "broadcom/common/v3d_device_info.h"
29 #include "qpu_instr.h"
30 
31 #ifndef QPU_MASK
32 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33 /* Using the GNU statement expression extension */
34 #define QPU_SET_FIELD(value, field)                                       \
35         ({                                                                \
36                 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37                 assert((fieldval & ~ field ## _MASK) == 0);               \
38                 fieldval & field ## _MASK;                                \
39          })
40 
41 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
42 
43 #define QPU_UPDATE_FIELD(inst, value, field)                              \
44         (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
45 #endif /* QPU_MASK */
46 
47 #define V3D_QPU_OP_MUL_SHIFT                58
48 #define V3D_QPU_OP_MUL_MASK                 QPU_MASK(63, 58)
49 
50 #define V3D_QPU_SIG_SHIFT                   53
51 #define V3D_QPU_SIG_MASK                    QPU_MASK(57, 53)
52 
53 #define V3D_QPU_COND_SHIFT                  46
54 #define V3D_QPU_COND_MASK                   QPU_MASK(52, 46)
55 #define V3D_QPU_COND_SIG_MAGIC_ADDR         (1 << 6)
56 
57 #define V3D_QPU_MM                          QPU_MASK(45, 45)
58 #define V3D_QPU_MA                          QPU_MASK(44, 44)
59 
60 #define V3D_QPU_WADDR_M_SHIFT               38
61 #define V3D_QPU_WADDR_M_MASK                QPU_MASK(43, 38)
62 
63 #define V3D_QPU_BRANCH_ADDR_LOW_SHIFT       35
64 #define V3D_QPU_BRANCH_ADDR_LOW_MASK        QPU_MASK(55, 35)
65 
66 #define V3D_QPU_WADDR_A_SHIFT               32
67 #define V3D_QPU_WADDR_A_MASK                QPU_MASK(37, 32)
68 
69 #define V3D_QPU_BRANCH_COND_SHIFT           32
70 #define V3D_QPU_BRANCH_COND_MASK            QPU_MASK(34, 32)
71 
72 #define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT      24
73 #define V3D_QPU_BRANCH_ADDR_HIGH_MASK       QPU_MASK(31, 24)
74 
75 #define V3D_QPU_OP_ADD_SHIFT                24
76 #define V3D_QPU_OP_ADD_MASK                 QPU_MASK(31, 24)
77 
78 #define V3D_QPU_MUL_B_SHIFT                 21
79 #define V3D_QPU_MUL_B_MASK                  QPU_MASK(23, 21)
80 
81 #define V3D_QPU_BRANCH_MSFIGN_SHIFT         21
82 #define V3D_QPU_BRANCH_MSFIGN_MASK          QPU_MASK(22, 21)
83 
84 #define V3D_QPU_MUL_A_SHIFT                 18
85 #define V3D_QPU_MUL_A_MASK                  QPU_MASK(20, 18)
86 
87 #define V3D_QPU_ADD_B_SHIFT                 15
88 #define V3D_QPU_ADD_B_MASK                  QPU_MASK(17, 15)
89 
90 #define V3D_QPU_BRANCH_BDU_SHIFT            15
91 #define V3D_QPU_BRANCH_BDU_MASK             QPU_MASK(17, 15)
92 
93 #define V3D_QPU_BRANCH_UB                   QPU_MASK(14, 14)
94 
95 #define V3D_QPU_ADD_A_SHIFT                 12
96 #define V3D_QPU_ADD_A_MASK                  QPU_MASK(14, 12)
97 
98 #define V3D_QPU_BRANCH_BDI_SHIFT            12
99 #define V3D_QPU_BRANCH_BDI_MASK             QPU_MASK(13, 12)
100 
101 #define V3D_QPU_RADDR_A_SHIFT               6
102 #define V3D_QPU_RADDR_A_MASK                QPU_MASK(11, 6)
103 
104 #define V3D_QPU_RADDR_B_SHIFT               0
105 #define V3D_QPU_RADDR_B_MASK                QPU_MASK(5, 0)
106 
107 #define THRSW .thrsw = true
108 #define LDUNIF .ldunif = true
109 #define LDUNIFRF .ldunifrf = true
110 #define LDUNIFA .ldunifa = true
111 #define LDUNIFARF .ldunifarf = true
112 #define LDTMU .ldtmu = true
113 #define LDVARY .ldvary = true
114 #define LDVPM .ldvpm = true
115 #define SMIMM .small_imm = true
116 #define LDTLB .ldtlb = true
117 #define LDTLBU .ldtlbu = true
118 #define UCB .ucb = true
119 #define ROT .rotate = true
120 #define WRTMUC .wrtmuc = true
121 
122 static const struct v3d_qpu_sig v33_sig_map[] = {
123         /*      MISC   R3       R4      R5 */
124         [0]  = {                               },
125         [1]  = { THRSW,                        },
126         [2]  = {                        LDUNIF },
127         [3]  = { THRSW,                 LDUNIF },
128         [4]  = {                LDTMU,         },
129         [5]  = { THRSW,         LDTMU,         },
130         [6]  = {                LDTMU,  LDUNIF },
131         [7]  = { THRSW,         LDTMU,  LDUNIF },
132         [8]  = {        LDVARY,                },
133         [9]  = { THRSW, LDVARY,                },
134         [10] = {        LDVARY,         LDUNIF },
135         [11] = { THRSW, LDVARY,         LDUNIF },
136         [12] = {        LDVARY, LDTMU,         },
137         [13] = { THRSW, LDVARY, LDTMU,         },
138         [14] = { SMIMM, LDVARY,                },
139         [15] = { SMIMM,                        },
140         [16] = {        LDTLB,                 },
141         [17] = {        LDTLBU,                },
142         /* 18-21 reserved */
143         [22] = { UCB,                          },
144         [23] = { ROT,                          },
145         [24] = {        LDVPM,                 },
146         [25] = { THRSW, LDVPM,                 },
147         [26] = {        LDVPM,          LDUNIF },
148         [27] = { THRSW, LDVPM,          LDUNIF },
149         [28] = {        LDVPM, LDTMU,          },
150         [29] = { THRSW, LDVPM, LDTMU,          },
151         [30] = { SMIMM, LDVPM,                 },
152         [31] = { SMIMM,                        },
153 };
154 
155 static const struct v3d_qpu_sig v40_sig_map[] = {
156         /*      MISC    R3      R4      R5 */
157         [0]  = {                               },
158         [1]  = { THRSW,                        },
159         [2]  = {                        LDUNIF },
160         [3]  = { THRSW,                 LDUNIF },
161         [4]  = {                LDTMU,         },
162         [5]  = { THRSW,         LDTMU,         },
163         [6]  = {                LDTMU,  LDUNIF },
164         [7]  = { THRSW,         LDTMU,  LDUNIF },
165         [8]  = {        LDVARY,                },
166         [9]  = { THRSW, LDVARY,                },
167         [10] = {        LDVARY,         LDUNIF },
168         [11] = { THRSW, LDVARY,         LDUNIF },
169         /* 12-13 reserved */
170         [14] = { SMIMM, LDVARY,                },
171         [15] = { SMIMM,                        },
172         [16] = {        LDTLB,                 },
173         [17] = {        LDTLBU,                },
174         [18] = {                        WRTMUC },
175         [19] = { THRSW,                 WRTMUC },
176         [20] = {        LDVARY,         WRTMUC },
177         [21] = { THRSW, LDVARY,         WRTMUC },
178         [22] = { UCB,                          },
179         [23] = { ROT,                          },
180         /* 24-30 reserved */
181         [31] = { SMIMM,         LDTMU,         },
182 };
183 
184 static const struct v3d_qpu_sig v41_sig_map[] = {
185         /*      MISC       phys    R5 */
186         [0]  = {                          },
187         [1]  = { THRSW,                   },
188         [2]  = {                   LDUNIF },
189         [3]  = { THRSW,            LDUNIF },
190         [4]  = {           LDTMU,         },
191         [5]  = { THRSW,    LDTMU,         },
192         [6]  = {           LDTMU,  LDUNIF },
193         [7]  = { THRSW,    LDTMU,  LDUNIF },
194         [8]  = {           LDVARY,        },
195         [9]  = { THRSW,    LDVARY,        },
196         [10] = {           LDVARY, LDUNIF },
197         [11] = { THRSW,    LDVARY, LDUNIF },
198         [12] = { LDUNIFRF                 },
199         [13] = { THRSW,    LDUNIFRF       },
200         [14] = { SMIMM,    LDVARY,        },
201         [15] = { SMIMM,                   },
202         [16] = {           LDTLB,         },
203         [17] = {           LDTLBU,        },
204         [18] = {                          WRTMUC },
205         [19] = { THRSW,                   WRTMUC },
206         [20] = {           LDVARY,        WRTMUC },
207         [21] = { THRSW,    LDVARY,        WRTMUC },
208         [22] = { UCB,                     },
209         [23] = { ROT,                     },
210         [24] = {                   LDUNIFA},
211         [25] = { LDUNIFARF                },
212         /* 26-30 reserved */
213         [31] = { SMIMM,            LDTMU, },
214 };
215 
216 bool
v3d_qpu_sig_unpack(const struct v3d_device_info * devinfo,uint32_t packed_sig,struct v3d_qpu_sig * sig)217 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
218                    uint32_t packed_sig,
219                    struct v3d_qpu_sig *sig)
220 {
221         if (packed_sig >= ARRAY_SIZE(v33_sig_map))
222                 return false;
223 
224         if (devinfo->ver >= 41)
225                 *sig = v41_sig_map[packed_sig];
226         else if (devinfo->ver == 40)
227                 *sig = v40_sig_map[packed_sig];
228         else
229                 *sig = v33_sig_map[packed_sig];
230 
231         /* Signals with zeroed unpacked contents after element 0 are reserved. */
232         return (packed_sig == 0 ||
233                 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
234 }
235 
236 bool
v3d_qpu_sig_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig,uint32_t * packed_sig)237 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
238                  const struct v3d_qpu_sig *sig,
239                  uint32_t *packed_sig)
240 {
241         static const struct v3d_qpu_sig *map;
242 
243         if (devinfo->ver >= 41)
244                 map = v41_sig_map;
245         else if (devinfo->ver == 40)
246                 map = v40_sig_map;
247         else
248                 map = v33_sig_map;
249 
250         for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
251                 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
252                         *packed_sig = i;
253                         return true;
254                 }
255         }
256 
257         return false;
258 }
259 static inline unsigned
fui(float f)260 fui( float f )
261 {
262         union {float f; unsigned ui;} fi;
263    fi.f = f;
264    return fi.ui;
265 }
266 
267 static const uint32_t small_immediates[] = {
268         0, 1, 2, 3,
269         4, 5, 6, 7,
270         8, 9, 10, 11,
271         12, 13, 14, 15,
272         -16, -15, -14, -13,
273         -12, -11, -10, -9,
274         -8, -7, -6, -5,
275         -4, -3, -2, -1,
276         0x3b800000, /* 2.0^-8 */
277         0x3c000000, /* 2.0^-7 */
278         0x3c800000, /* 2.0^-6 */
279         0x3d000000, /* 2.0^-5 */
280         0x3d800000, /* 2.0^-4 */
281         0x3e000000, /* 2.0^-3 */
282         0x3e800000, /* 2.0^-2 */
283         0x3f000000, /* 2.0^-1 */
284         0x3f800000, /* 2.0^0 */
285         0x40000000, /* 2.0^1 */
286         0x40800000, /* 2.0^2 */
287         0x41000000, /* 2.0^3 */
288         0x41800000, /* 2.0^4 */
289         0x42000000, /* 2.0^5 */
290         0x42800000, /* 2.0^6 */
291         0x43000000, /* 2.0^7 */
292 };
293 
294 bool
v3d_qpu_small_imm_unpack(const struct v3d_device_info * devinfo,uint32_t packed_small_immediate,uint32_t * small_immediate)295 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
296                          uint32_t packed_small_immediate,
297                          uint32_t *small_immediate)
298 {
299         if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
300                 return false;
301 
302         *small_immediate = small_immediates[packed_small_immediate];
303         return true;
304 }
305 
306 bool
v3d_qpu_small_imm_pack(const struct v3d_device_info * devinfo,uint32_t value,uint32_t * packed_small_immediate)307 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
308                        uint32_t value,
309                        uint32_t *packed_small_immediate)
310 {
311         STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
312 
313         for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
314                 if (small_immediates[i] == value) {
315                         *packed_small_immediate = i;
316                         return true;
317                 }
318         }
319 
320         return false;
321 }
322 
323 bool
v3d_qpu_flags_unpack(const struct v3d_device_info * devinfo,uint32_t packed_cond,struct v3d_qpu_flags * cond)324 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
325                      uint32_t packed_cond,
326                      struct v3d_qpu_flags *cond)
327 {
328         static const enum v3d_qpu_cond cond_map[4] = {
329                 [0] = V3D_QPU_COND_IFA,
330                 [1] = V3D_QPU_COND_IFB,
331                 [2] = V3D_QPU_COND_IFNA,
332                 [3] = V3D_QPU_COND_IFNB,
333         };
334 
335         cond->ac = V3D_QPU_COND_NONE;
336         cond->mc = V3D_QPU_COND_NONE;
337         cond->apf = V3D_QPU_PF_NONE;
338         cond->mpf = V3D_QPU_PF_NONE;
339         cond->auf = V3D_QPU_UF_NONE;
340         cond->muf = V3D_QPU_UF_NONE;
341 
342         if (packed_cond == 0) {
343                 return true;
344         } else if (packed_cond >> 2 == 0) {
345                 cond->apf = packed_cond & 0x3;
346         } else if (packed_cond >> 4 == 0) {
347                 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
348         } else if (packed_cond == 0x10) {
349                 return false;
350         } else if (packed_cond >> 2 == 0x4) {
351                 cond->mpf = packed_cond & 0x3;
352         } else if (packed_cond >> 4 == 0x1) {
353                 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
354         } else if (packed_cond >> 4 == 0x2) {
355                 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
356                 cond->mpf = packed_cond & 0x3;
357         } else if (packed_cond >> 4 == 0x3) {
358                 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
359                 cond->apf = packed_cond & 0x3;
360         } else if (packed_cond >> 6) {
361                 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
362                 if (((packed_cond >> 2) & 0x3) == 0) {
363                         cond->ac = cond_map[packed_cond & 0x3];
364                 } else {
365                         cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
366                 }
367         }
368 
369         return true;
370 }
371 
372 bool
v3d_qpu_flags_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_flags * cond,uint32_t * packed_cond)373 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
374                    const struct v3d_qpu_flags *cond,
375                    uint32_t *packed_cond)
376 {
377 #define AC (1 << 0)
378 #define MC (1 << 1)
379 #define APF (1 << 2)
380 #define MPF (1 << 3)
381 #define AUF (1 << 4)
382 #define MUF (1 << 5)
383         static const struct {
384                 uint8_t flags_present;
385                 uint8_t bits;
386         } flags_table[] = {
387                 { 0,        0 },
388                 { APF,      0 },
389                 { AUF,      0 },
390                 { MPF,      (1 << 4) },
391                 { MUF,      (1 << 4) },
392                 { AC,       (1 << 5) },
393                 { AC | MPF, (1 << 5) },
394                 { MC,       (1 << 5) | (1 << 4) },
395                 { MC | APF, (1 << 5) | (1 << 4) },
396                 { MC | AC,  (1 << 6) },
397                 { MC | AUF, (1 << 6) },
398         };
399 
400         uint8_t flags_present = 0;
401         if (cond->ac != V3D_QPU_COND_NONE)
402                 flags_present |= AC;
403         if (cond->mc != V3D_QPU_COND_NONE)
404                 flags_present |= MC;
405         if (cond->apf != V3D_QPU_PF_NONE)
406                 flags_present |= APF;
407         if (cond->mpf != V3D_QPU_PF_NONE)
408                 flags_present |= MPF;
409         if (cond->auf != V3D_QPU_UF_NONE)
410                 flags_present |= AUF;
411         if (cond->muf != V3D_QPU_UF_NONE)
412                 flags_present |= MUF;
413 
414         for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
415                 if (flags_table[i].flags_present != flags_present)
416                         continue;
417 
418                 *packed_cond = flags_table[i].bits;
419 
420                 *packed_cond |= cond->apf;
421                 *packed_cond |= cond->mpf;
422 
423                 if (flags_present & AUF)
424                         *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
425                 if (flags_present & MUF)
426                         *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
427 
428                 if (flags_present & AC)
429                         *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
430 
431                 if (flags_present & MC) {
432                         if (*packed_cond & (1 << 6))
433                                 *packed_cond |= (cond->mc -
434                                                  V3D_QPU_COND_IFA) << 4;
435                         else
436                                 *packed_cond |= (cond->mc -
437                                                  V3D_QPU_COND_IFA) << 2;
438                 }
439 
440                 return true;
441         }
442 
443         return false;
444 }
445 
446 /* Make a mapping of the table of opcodes in the spec.  The opcode is
447  * determined by a combination of the opcode field, and in the case of 0 or
448  * 1-arg opcodes, the mux_b field as well.
449  */
450 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
451 #define ANYMUX MUX_MASK(0, 7)
452 
453 struct opcode_desc {
454         uint8_t opcode_first;
455         uint8_t opcode_last;
456         uint8_t mux_b_mask;
457         uint8_t mux_a_mask;
458         uint8_t op;
459 
460         /* first_ver == 0 if it's the same across all V3D versions.
461          * first_ver == X, last_ver == 0 if it's the same for all V3D versions
462          *   starting from X
463          * first_ver == X, last_ver == Y if it's the same for all V3D versions
464          *   on the range X through Y
465          */
466         uint8_t first_ver;
467         uint8_t last_ver;
468 };
469 
470 static const struct opcode_desc add_ops[] = {
471         /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
472         { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADD },
473         { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
474         { 53,  55,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
475         { 56,  56,  ANYMUX, ANYMUX, V3D_QPU_A_ADD },
476         { 57,  59,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
477         { 60,  60,  ANYMUX, ANYMUX, V3D_QPU_A_SUB },
478         { 61,  63,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
479         { 64,  111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
480         { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
481         { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
482         { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
483         { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
484         { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
485         { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
486         { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
487         { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
488         /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
489         { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
490         { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
491         { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
492 
493         { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
494         { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
495         { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
496 
497         { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
498         { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
499         { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
500         { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
501         { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
502         { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
503         { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP },
504         { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP },
505         { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
506         { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
507         { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
508         { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
509         { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
510         { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
511         { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
512         { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
513         { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
514         { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
515 
516         { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
517         { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
518         { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
519         { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
520 
521         { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
522         { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
523         { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
524         { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
525         { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
526         { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 },
527         { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
528         { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
529         { 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 },
530         { 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 },
531         { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
532 
533         { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
534         { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
535         { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
536         { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
537         { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
538         { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 },
539         { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 },
540         { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 },
541         { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 },
542         { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 },
543         { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
544         { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
545 
546         /* FIXME: MORE COMPLICATED */
547         /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
548 
549         { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
550         { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
551 
552         { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
553         { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
554         { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
555         { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
556         { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
557         { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
558         { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
559         { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
560 
561         { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
562         { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
563 
564         /* The stvpms are distinguished by the waddr field. */
565         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
566         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
567         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
568 
569         { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
570         { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
571         { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
572 };
573 
574 static const struct opcode_desc mul_ops[] = {
575         { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
576         { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
577         { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
578         { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
579         { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
580         { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
581         { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
582         { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
583         { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
584         { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
585         { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
586 };
587 
588 /* Returns true if op_desc should be filtered out based on devinfo->ver
589  * against op_desc->first_ver and op_desc->last_ver. Check notes about
590  * first_ver/last_ver on struct opcode_desc comments.
591  */
592 static bool
opcode_invalid_in_version(const struct v3d_device_info * devinfo,const struct opcode_desc * op_desc)593 opcode_invalid_in_version(const struct v3d_device_info *devinfo,
594                           const struct opcode_desc *op_desc)
595 {
596         return (op_desc->first_ver != 0 && devinfo->ver < op_desc->first_ver) ||
597                 (op_desc->last_ver != 0  && devinfo->ver > op_desc->last_ver);
598 }
599 
600 static const struct opcode_desc *
lookup_opcode_from_packed(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint32_t opcode,uint32_t mux_a,uint32_t mux_b)601 lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
602                           const struct opcode_desc *opcodes,
603                           size_t num_opcodes, uint32_t opcode,
604                           uint32_t mux_a, uint32_t mux_b)
605 {
606         for (int i = 0; i < num_opcodes; i++) {
607                 const struct opcode_desc *op_desc = &opcodes[i];
608 
609                 if (opcode < op_desc->opcode_first ||
610                     opcode > op_desc->opcode_last)
611                         continue;
612 
613                 if (opcode_invalid_in_version(devinfo, op_desc))
614                         continue;
615 
616                 if (!(op_desc->mux_b_mask & (1 << mux_b)))
617                         continue;
618 
619                 if (!(op_desc->mux_a_mask & (1 << mux_a)))
620                         continue;
621 
622                 return op_desc;
623         }
624 
625         return NULL;
626 }
627 
628 static bool
v3d_qpu_float32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)629 v3d_qpu_float32_unpack_unpack(uint32_t packed,
630                               enum v3d_qpu_input_unpack *unpacked)
631 {
632         switch (packed) {
633         case 0:
634                 *unpacked = V3D_QPU_UNPACK_ABS;
635                 return true;
636         case 1:
637                 *unpacked = V3D_QPU_UNPACK_NONE;
638                 return true;
639         case 2:
640                 *unpacked = V3D_QPU_UNPACK_L;
641                 return true;
642         case 3:
643                 *unpacked = V3D_QPU_UNPACK_H;
644                 return true;
645         default:
646                 return false;
647         }
648 }
649 
650 static bool
v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)651 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
652                             uint32_t *packed)
653 {
654         switch (unpacked) {
655         case V3D_QPU_UNPACK_ABS:
656                 *packed = 0;
657                 return true;
658         case V3D_QPU_UNPACK_NONE:
659                 *packed = 1;
660                 return true;
661         case V3D_QPU_UNPACK_L:
662                 *packed = 2;
663                 return true;
664         case V3D_QPU_UNPACK_H:
665                 *packed = 3;
666                 return true;
667         default:
668                 return false;
669         }
670 }
671 
672 static bool
v3d_qpu_float16_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)673 v3d_qpu_float16_unpack_unpack(uint32_t packed,
674                               enum v3d_qpu_input_unpack *unpacked)
675 {
676         switch (packed) {
677         case 0:
678                 *unpacked = V3D_QPU_UNPACK_NONE;
679                 return true;
680         case 1:
681                 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
682                 return true;
683         case 2:
684                 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
685                 return true;
686         case 3:
687                 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
688                 return true;
689         case 4:
690                 *unpacked = V3D_QPU_UNPACK_SWAP_16;
691                 return true;
692         default:
693                 return false;
694         }
695 }
696 
697 static bool
v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)698 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
699                             uint32_t *packed)
700 {
701         switch (unpacked) {
702         case V3D_QPU_UNPACK_NONE:
703                 *packed = 0;
704                 return true;
705         case V3D_QPU_UNPACK_REPLICATE_32F_16:
706                 *packed = 1;
707                 return true;
708         case V3D_QPU_UNPACK_REPLICATE_L_16:
709                 *packed = 2;
710                 return true;
711         case V3D_QPU_UNPACK_REPLICATE_H_16:
712                 *packed = 3;
713                 return true;
714         case V3D_QPU_UNPACK_SWAP_16:
715                 *packed = 4;
716                 return true;
717         default:
718                 return false;
719         }
720 }
721 
722 static bool
v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)723 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
724                           uint32_t *packed)
725 {
726         switch (unpacked) {
727         case V3D_QPU_PACK_NONE:
728                 *packed = 0;
729                 return true;
730         case V3D_QPU_PACK_L:
731                 *packed = 1;
732                 return true;
733         case V3D_QPU_PACK_H:
734                 *packed = 2;
735                 return true;
736         default:
737                 return false;
738         }
739 }
740 
741 static bool
v3d_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)742 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
743                    struct v3d_qpu_instr *instr)
744 {
745         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
746         uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
747         uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);
748         uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
749 
750         uint32_t map_op = op;
751         /* Some big clusters of opcodes are replicated with unpack
752          * flags
753          */
754         if (map_op >= 249 && map_op <= 251)
755                 map_op = (map_op - 249 + 245);
756         if (map_op >= 253 && map_op <= 255)
757                 map_op = (map_op - 253 + 245);
758 
759         const struct opcode_desc *desc =
760                 lookup_opcode_from_packed(devinfo, add_ops, ARRAY_SIZE(add_ops),
761                                           map_op, mux_a, mux_b);
762 
763         if (!desc)
764                 return false;
765 
766         instr->alu.add.op = desc->op;
767 
768         /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
769          * operands.
770          */
771         if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
772                 if (instr->alu.add.op == V3D_QPU_A_FMIN)
773                         instr->alu.add.op = V3D_QPU_A_FMAX;
774                 if (instr->alu.add.op == V3D_QPU_A_FADD)
775                         instr->alu.add.op = V3D_QPU_A_FADDNF;
776         }
777 
778         /* Some QPU ops require a bit more than just basic opcode and mux a/b
779          * comparisons to distinguish them.
780          */
781         switch (instr->alu.add.op) {
782         case V3D_QPU_A_STVPMV:
783         case V3D_QPU_A_STVPMD:
784         case V3D_QPU_A_STVPMP:
785                 switch (waddr) {
786                 case 0:
787                         instr->alu.add.op = V3D_QPU_A_STVPMV;
788                         break;
789                 case 1:
790                         instr->alu.add.op = V3D_QPU_A_STVPMD;
791                         break;
792                 case 2:
793                         instr->alu.add.op = V3D_QPU_A_STVPMP;
794                         break;
795                 default:
796                         return false;
797                 }
798                 break;
799         default:
800                 break;
801         }
802 
803         switch (instr->alu.add.op) {
804         case V3D_QPU_A_FADD:
805         case V3D_QPU_A_FADDNF:
806         case V3D_QPU_A_FSUB:
807         case V3D_QPU_A_FMIN:
808         case V3D_QPU_A_FMAX:
809         case V3D_QPU_A_FCMP:
810         case V3D_QPU_A_VFPACK:
811                 if (instr->alu.add.op != V3D_QPU_A_VFPACK)
812                         instr->alu.add.output_pack = (op >> 4) & 0x3;
813                 else
814                         instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
815 
816                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
817                                                    &instr->alu.add.a_unpack)) {
818                         return false;
819                 }
820 
821                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
822                                                    &instr->alu.add.b_unpack)) {
823                         return false;
824                 }
825                 break;
826 
827         case V3D_QPU_A_FFLOOR:
828         case V3D_QPU_A_FROUND:
829         case V3D_QPU_A_FTRUNC:
830         case V3D_QPU_A_FCEIL:
831         case V3D_QPU_A_FDX:
832         case V3D_QPU_A_FDY:
833                 instr->alu.add.output_pack = mux_b & 0x3;
834 
835                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
836                                                    &instr->alu.add.a_unpack)) {
837                         return false;
838                 }
839                 break;
840 
841         case V3D_QPU_A_FTOIN:
842         case V3D_QPU_A_FTOIZ:
843         case V3D_QPU_A_FTOUZ:
844         case V3D_QPU_A_FTOC:
845                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
846 
847                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
848                                                    &instr->alu.add.a_unpack)) {
849                         return false;
850                 }
851                 break;
852 
853         case V3D_QPU_A_VFMIN:
854         case V3D_QPU_A_VFMAX:
855                 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
856                                                    &instr->alu.add.a_unpack)) {
857                         return false;
858                 }
859 
860                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
861                 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
862                 break;
863 
864         default:
865                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
866                 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
867                 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
868                 break;
869         }
870 
871         instr->alu.add.a = mux_a;
872         instr->alu.add.b = mux_b;
873         instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
874 
875         instr->alu.add.magic_write = false;
876         if (packed_inst & V3D_QPU_MA) {
877                 switch (instr->alu.add.op) {
878                 case V3D_QPU_A_LDVPMV_IN:
879                         instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
880                         break;
881                 case V3D_QPU_A_LDVPMD_IN:
882                         instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
883                         break;
884                 case V3D_QPU_A_LDVPMG_IN:
885                         instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
886                         break;
887                 default:
888                         instr->alu.add.magic_write = true;
889                         break;
890                 }
891         }
892 
893         return true;
894 }
895 
896 static bool
v3d_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)897 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
898                    struct v3d_qpu_instr *instr)
899 {
900         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
901         uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
902         uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
903 
904         {
905                 const struct opcode_desc *desc =
906                         lookup_opcode_from_packed(devinfo, mul_ops,
907                                                   ARRAY_SIZE(mul_ops),
908                                                   op, mux_a, mux_b);
909                 if (!desc)
910                         return false;
911 
912                 instr->alu.mul.op = desc->op;
913         }
914 
915         switch (instr->alu.mul.op) {
916         case V3D_QPU_M_FMUL:
917                 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
918 
919                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
920                                                    &instr->alu.mul.a_unpack)) {
921                         return false;
922                 }
923 
924                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
925                                                    &instr->alu.mul.b_unpack)) {
926                         return false;
927                 }
928 
929                 break;
930 
931         case V3D_QPU_M_FMOV:
932                 instr->alu.mul.output_pack = (((op & 1) << 1) +
933                                               ((mux_b >> 2) & 1));
934 
935                 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
936                                                    &instr->alu.mul.a_unpack)) {
937                         return false;
938                 }
939 
940                 break;
941 
942         case V3D_QPU_M_VFMUL:
943                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
944 
945                 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
946                                                    &instr->alu.mul.a_unpack)) {
947                         return false;
948                 }
949 
950                 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
951 
952                 break;
953 
954         default:
955                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
956                 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
957                 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
958                 break;
959         }
960 
961         instr->alu.mul.a = mux_a;
962         instr->alu.mul.b = mux_b;
963         instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
964         instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
965 
966         return true;
967 }
968 
969 static const struct opcode_desc *
lookup_opcode_from_instr(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint8_t op)970 lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
971                          const struct opcode_desc *opcodes, size_t num_opcodes,
972                          uint8_t op)
973 {
974         for (int i = 0; i < num_opcodes; i++) {
975                 const struct opcode_desc *op_desc = &opcodes[i];
976 
977                 if (op_desc->op != op)
978                         continue;
979 
980                 if (opcode_invalid_in_version(devinfo, op_desc))
981                         continue;
982 
983                 return op_desc;
984         }
985 
986         return NULL;
987 }
988 
989 static bool
v3d_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)990 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
991                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
992 {
993         uint32_t waddr = instr->alu.add.waddr;
994         uint32_t mux_a = instr->alu.add.a;
995         uint32_t mux_b = instr->alu.add.b;
996         int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
997         const struct opcode_desc *desc =
998                 lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops),
999                                          instr->alu.add.op);
1000 
1001         if (!desc)
1002                 return false;
1003 
1004         uint32_t opcode = desc->opcode_first;
1005 
1006         /* If an operation doesn't use an arg, its mux values may be used to
1007          * identify the operation type.
1008          */
1009         if (nsrc < 2)
1010                 mux_b = ffs(desc->mux_b_mask) - 1;
1011 
1012         if (nsrc < 1)
1013                 mux_a = ffs(desc->mux_a_mask) - 1;
1014 
1015         bool no_magic_write = false;
1016 
1017         switch (instr->alu.add.op) {
1018         case V3D_QPU_A_STVPMV:
1019                 waddr = 0;
1020                 no_magic_write = true;
1021                 break;
1022         case V3D_QPU_A_STVPMD:
1023                 waddr = 1;
1024                 no_magic_write = true;
1025                 break;
1026         case V3D_QPU_A_STVPMP:
1027                 waddr = 2;
1028                 no_magic_write = true;
1029                 break;
1030 
1031         case V3D_QPU_A_LDVPMV_IN:
1032         case V3D_QPU_A_LDVPMD_IN:
1033         case V3D_QPU_A_LDVPMP:
1034         case V3D_QPU_A_LDVPMG_IN:
1035                 assert(!instr->alu.add.magic_write);
1036                 break;
1037 
1038         case V3D_QPU_A_LDVPMV_OUT:
1039         case V3D_QPU_A_LDVPMD_OUT:
1040         case V3D_QPU_A_LDVPMG_OUT:
1041                 assert(!instr->alu.add.magic_write);
1042                 *packed_instr |= V3D_QPU_MA;
1043                 break;
1044 
1045         default:
1046                 break;
1047         }
1048 
1049         switch (instr->alu.add.op) {
1050         case V3D_QPU_A_FADD:
1051         case V3D_QPU_A_FADDNF:
1052         case V3D_QPU_A_FSUB:
1053         case V3D_QPU_A_FMIN:
1054         case V3D_QPU_A_FMAX:
1055         case V3D_QPU_A_FCMP: {
1056                 uint32_t output_pack;
1057                 uint32_t a_unpack;
1058                 uint32_t b_unpack;
1059 
1060                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1061                                                &output_pack)) {
1062                         return false;
1063                 }
1064                 opcode |= output_pack << 4;
1065 
1066                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1067                                                  &a_unpack)) {
1068                         return false;
1069                 }
1070 
1071                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1072                                                  &b_unpack)) {
1073                         return false;
1074                 }
1075 
1076                 /* These operations with commutative operands are
1077                  * distinguished by which order their operands come in.
1078                  */
1079                 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1080                 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1081                       instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1082                     ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1083                       instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1084                         uint32_t temp;
1085 
1086                         temp = a_unpack;
1087                         a_unpack = b_unpack;
1088                         b_unpack = temp;
1089 
1090                         temp = mux_a;
1091                         mux_a = mux_b;
1092                         mux_b = temp;
1093                 }
1094 
1095                 opcode |= a_unpack << 2;
1096                 opcode |= b_unpack << 0;
1097 
1098                 break;
1099         }
1100 
1101         case V3D_QPU_A_VFPACK: {
1102                 uint32_t a_unpack;
1103                 uint32_t b_unpack;
1104 
1105                 if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||
1106                     instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {
1107                         return false;
1108                 }
1109 
1110                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1111                                                  &a_unpack)) {
1112                         return false;
1113                 }
1114 
1115                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1116                                                  &b_unpack)) {
1117                         return false;
1118                 }
1119 
1120                 opcode = (opcode & ~(1 << 2)) | (a_unpack << 2);
1121                 opcode = (opcode & ~(1 << 0)) | (b_unpack << 0);
1122 
1123                 break;
1124         }
1125 
1126         case V3D_QPU_A_FFLOOR:
1127         case V3D_QPU_A_FROUND:
1128         case V3D_QPU_A_FTRUNC:
1129         case V3D_QPU_A_FCEIL:
1130         case V3D_QPU_A_FDX:
1131         case V3D_QPU_A_FDY: {
1132                 uint32_t packed;
1133 
1134                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1135                                                &packed)) {
1136                         return false;
1137                 }
1138                 mux_b |= packed;
1139 
1140                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1141                                                  &packed)) {
1142                         return false;
1143                 }
1144                 if (packed == 0)
1145                         return false;
1146                 opcode = (opcode & ~(1 << 2)) | packed << 2;
1147                 break;
1148         }
1149 
1150         case V3D_QPU_A_FTOIN:
1151         case V3D_QPU_A_FTOIZ:
1152         case V3D_QPU_A_FTOUZ:
1153         case V3D_QPU_A_FTOC:
1154                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1155                         return false;
1156 
1157                 uint32_t packed;
1158                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1159                                                  &packed)) {
1160                         return false;
1161                 }
1162                 if (packed == 0)
1163                         return false;
1164                 opcode |= packed << 2;
1165 
1166                 break;
1167 
1168         case V3D_QPU_A_VFMIN:
1169         case V3D_QPU_A_VFMAX:
1170                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1171                     instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
1172                         return false;
1173                 }
1174 
1175                 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
1176                                                  &packed)) {
1177                         return false;
1178                 }
1179                 opcode |= packed;
1180                 break;
1181 
1182         default:
1183                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
1184                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1185                      instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
1186                      instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
1187                         return false;
1188                 }
1189                 break;
1190         }
1191 
1192         *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);
1193         *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);
1194         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
1195         *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1196         if (instr->alu.add.magic_write && !no_magic_write)
1197                 *packed_instr |= V3D_QPU_MA;
1198 
1199         return true;
1200 }
1201 
1202 static bool
v3d_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1203 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1204                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1205 {
1206         uint32_t mux_a = instr->alu.mul.a;
1207         uint32_t mux_b = instr->alu.mul.b;
1208         int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1209 
1210         const struct opcode_desc *desc =
1211                 lookup_opcode_from_instr(devinfo, mul_ops, ARRAY_SIZE(mul_ops),
1212                                          instr->alu.mul.op);
1213 
1214         if (!desc)
1215                 return false;
1216 
1217         uint32_t opcode = desc->opcode_first;
1218 
1219         /* Some opcodes have a single valid value for their mux a/b, so set
1220          * that here.  If mux a/b determine packing, it will be set below.
1221          */
1222         if (nsrc < 2)
1223                 mux_b = ffs(desc->mux_b_mask) - 1;
1224 
1225         if (nsrc < 1)
1226                 mux_a = ffs(desc->mux_a_mask) - 1;
1227 
1228         switch (instr->alu.mul.op) {
1229         case V3D_QPU_M_FMUL: {
1230                 uint32_t packed;
1231 
1232                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1233                                                &packed)) {
1234                         return false;
1235                 }
1236                 /* No need for a +1 because desc->opcode_first has a 1 in this
1237                  * field.
1238                  */
1239                 opcode += packed << 4;
1240 
1241                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1242                                                  &packed)) {
1243                         return false;
1244                 }
1245                 opcode |= packed << 2;
1246 
1247                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1248                                                  &packed)) {
1249                         return false;
1250                 }
1251                 opcode |= packed << 0;
1252                 break;
1253         }
1254 
1255         case V3D_QPU_M_FMOV: {
1256                 uint32_t packed;
1257 
1258                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1259                                                &packed)) {
1260                         return false;
1261                 }
1262                 opcode |= (packed >> 1) & 1;
1263                 mux_b = (packed & 1) << 2;
1264 
1265                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1266                                                  &packed)) {
1267                         return false;
1268                 }
1269                 mux_b |= packed;
1270                 break;
1271         }
1272 
1273         case V3D_QPU_M_VFMUL: {
1274                 uint32_t packed;
1275 
1276                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1277                         return false;
1278 
1279                 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1280                                                  &packed)) {
1281                         return false;
1282                 }
1283                 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1284                         opcode = 8;
1285                 else
1286                         opcode |= (packed + 4) & 7;
1287 
1288                 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1289                         return false;
1290 
1291                 break;
1292         }
1293 
1294         default:
1295                 break;
1296         }
1297 
1298         *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);
1299         *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);
1300 
1301         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
1302         *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1303         if (instr->alu.mul.magic_write)
1304                 *packed_instr |= V3D_QPU_MM;
1305 
1306         return true;
1307 }
1308 
1309 static bool
v3d_qpu_instr_unpack_alu(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1310 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1311                          uint64_t packed_instr,
1312                          struct v3d_qpu_instr *instr)
1313 {
1314         instr->type = V3D_QPU_INSTR_TYPE_ALU;
1315 
1316         if (!v3d_qpu_sig_unpack(devinfo,
1317                                 QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),
1318                                 &instr->sig))
1319                 return false;
1320 
1321         uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);
1322         if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1323                 instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;
1324                 instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;
1325 
1326                 instr->flags.ac = V3D_QPU_COND_NONE;
1327                 instr->flags.mc = V3D_QPU_COND_NONE;
1328                 instr->flags.apf = V3D_QPU_PF_NONE;
1329                 instr->flags.mpf = V3D_QPU_PF_NONE;
1330                 instr->flags.auf = V3D_QPU_UF_NONE;
1331                 instr->flags.muf = V3D_QPU_UF_NONE;
1332         } else {
1333                 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1334                         return false;
1335         }
1336 
1337         instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
1338         instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
1339 
1340         if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1341                 return false;
1342 
1343         if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1344                 return false;
1345 
1346         return true;
1347 }
1348 
1349 static bool
v3d_qpu_instr_unpack_branch(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1350 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1351                             uint64_t packed_instr,
1352                             struct v3d_qpu_instr *instr)
1353 {
1354         instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1355 
1356         uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);
1357         if (cond == 0)
1358                 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1359         else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1360                  V3D_QPU_BRANCH_COND_ALLNA)
1361                 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1362         else
1363                 return false;
1364 
1365         uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);
1366         if (msfign == 3)
1367                 return false;
1368         instr->branch.msfign = msfign;
1369 
1370         instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);
1371 
1372         instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;
1373         if (instr->branch.ub) {
1374                 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1375                                                   V3D_QPU_BRANCH_BDU);
1376         }
1377 
1378         instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1379                                               V3D_QPU_RADDR_A);
1380 
1381         instr->branch.offset = 0;
1382 
1383         instr->branch.offset +=
1384                 QPU_GET_FIELD(packed_instr,
1385                               V3D_QPU_BRANCH_ADDR_LOW) << 3;
1386 
1387         instr->branch.offset +=
1388                 QPU_GET_FIELD(packed_instr,
1389                               V3D_QPU_BRANCH_ADDR_HIGH) << 24;
1390 
1391         return true;
1392 }
1393 
1394 bool
v3d_qpu_instr_unpack(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1395 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1396                      uint64_t packed_instr,
1397                      struct v3d_qpu_instr *instr)
1398 {
1399         if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {
1400                 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1401         } else {
1402                 uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);
1403 
1404                 if ((sig & 24) == 16) {
1405                         return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1406                                                            instr);
1407                 } else {
1408                         return false;
1409                 }
1410         }
1411 }
1412 
1413 static bool
v3d_qpu_instr_pack_alu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1414 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1415                        const struct v3d_qpu_instr *instr,
1416                        uint64_t *packed_instr)
1417 {
1418         uint32_t sig;
1419         if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1420                 return false;
1421         *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
1422 
1423         if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1424                 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
1425                 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
1426 
1427                 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1428                         return false;
1429                 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1430                         return false;
1431 
1432                 uint32_t flags;
1433                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1434                         if (instr->flags.ac != V3D_QPU_COND_NONE ||
1435                             instr->flags.mc != V3D_QPU_COND_NONE ||
1436                             instr->flags.apf != V3D_QPU_PF_NONE ||
1437                             instr->flags.mpf != V3D_QPU_PF_NONE ||
1438                             instr->flags.auf != V3D_QPU_UF_NONE ||
1439                             instr->flags.muf != V3D_QPU_UF_NONE) {
1440                                 return false;
1441                         }
1442 
1443                         flags = instr->sig_addr;
1444                         if (instr->sig_magic)
1445                                 flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;
1446                 } else {
1447                         if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1448                                 return false;
1449                 }
1450 
1451                 *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);
1452         } else {
1453                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1454                         return false;
1455         }
1456 
1457         return true;
1458 }
1459 
1460 static bool
v3d_qpu_instr_pack_branch(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1461 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1462                           const struct v3d_qpu_instr *instr,
1463                           uint64_t *packed_instr)
1464 {
1465         *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);
1466 
1467         if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1468                 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1469                                                     V3D_QPU_BRANCH_COND_A0),
1470                                                V3D_QPU_BRANCH_COND);
1471         }
1472 
1473         *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1474                                        V3D_QPU_BRANCH_MSFIGN);
1475 
1476         *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1477                                        V3D_QPU_BRANCH_BDI);
1478 
1479         if (instr->branch.ub) {
1480                 *packed_instr |= V3D_QPU_BRANCH_UB;
1481                 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1482                                                V3D_QPU_BRANCH_BDU);
1483         }
1484 
1485         switch (instr->branch.bdi) {
1486         case V3D_QPU_BRANCH_DEST_ABS:
1487         case V3D_QPU_BRANCH_DEST_REL:
1488                 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1489                                                V3D_QPU_BRANCH_MSFIGN);
1490 
1491                 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1492                                                 ~0xff000000) >> 3,
1493                                                V3D_QPU_BRANCH_ADDR_LOW);
1494 
1495                 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1496                                                V3D_QPU_BRANCH_ADDR_HIGH);
1497                 break;
1498         default:
1499                 break;
1500         }
1501 
1502         if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
1503             instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
1504                 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1505                                                V3D_QPU_RADDR_A);
1506         }
1507 
1508         return true;
1509 }
1510 
1511 bool
v3d_qpu_instr_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1512 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1513                    const struct v3d_qpu_instr *instr,
1514                    uint64_t *packed_instr)
1515 {
1516         *packed_instr = 0;
1517 
1518         switch (instr->type) {
1519         case V3D_QPU_INSTR_TYPE_ALU:
1520                 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1521         case V3D_QPU_INSTR_TYPE_BRANCH:
1522                 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1523         default:
1524                 return false;
1525         }
1526 }
1527