• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <string.h>
25 #include "util/macros.h"
26 #include "util/bitscan.h"
27 
28 #include "broadcom/common/v3d_device_info.h"
29 #include "qpu_instr.h"
30 
31 #ifndef QPU_MASK
32 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33 /* Using the GNU statement expression extension */
34 #define QPU_SET_FIELD(value, field)                                       \
35         ({                                                                \
36                 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37                 assert((fieldval & ~ field ## _MASK) == 0);               \
38                 fieldval & field ## _MASK;                                \
39          })
40 
41 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
42 
43 #define QPU_UPDATE_FIELD(inst, value, field)                              \
44         (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
45 #endif /* QPU_MASK */
46 
47 #define V3D_QPU_OP_MUL_SHIFT                58
48 #define V3D_QPU_OP_MUL_MASK                 QPU_MASK(63, 58)
49 
50 #define V3D_QPU_SIG_SHIFT                   53
51 #define V3D_QPU_SIG_MASK                    QPU_MASK(57, 53)
52 
53 #define V3D_QPU_COND_SHIFT                  46
54 #define V3D_QPU_COND_MASK                   QPU_MASK(52, 46)
55 #define V3D_QPU_COND_SIG_MAGIC_ADDR         (1 << 6)
56 
57 #define V3D_QPU_MM                          QPU_MASK(45, 45)
58 #define V3D_QPU_MA                          QPU_MASK(44, 44)
59 
60 #define V3D_QPU_WADDR_M_SHIFT               38
61 #define V3D_QPU_WADDR_M_MASK                QPU_MASK(43, 38)
62 
63 #define V3D_QPU_BRANCH_ADDR_LOW_SHIFT       35
64 #define V3D_QPU_BRANCH_ADDR_LOW_MASK        QPU_MASK(55, 35)
65 
66 #define V3D_QPU_WADDR_A_SHIFT               32
67 #define V3D_QPU_WADDR_A_MASK                QPU_MASK(37, 32)
68 
69 #define V3D_QPU_BRANCH_COND_SHIFT           32
70 #define V3D_QPU_BRANCH_COND_MASK            QPU_MASK(34, 32)
71 
72 #define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT      24
73 #define V3D_QPU_BRANCH_ADDR_HIGH_MASK       QPU_MASK(31, 24)
74 
75 #define V3D_QPU_OP_ADD_SHIFT                24
76 #define V3D_QPU_OP_ADD_MASK                 QPU_MASK(31, 24)
77 
78 #define V3D_QPU_MUL_B_SHIFT                 21
79 #define V3D_QPU_MUL_B_MASK                  QPU_MASK(23, 21)
80 
81 #define V3D_QPU_BRANCH_MSFIGN_SHIFT         21
82 #define V3D_QPU_BRANCH_MSFIGN_MASK          QPU_MASK(22, 21)
83 
84 #define V3D_QPU_MUL_A_SHIFT                 18
85 #define V3D_QPU_MUL_A_MASK                  QPU_MASK(20, 18)
86 
87 #define V3D_QPU_RADDR_C_SHIFT               18
88 #define V3D_QPU_RADDR_C_MASK                QPU_MASK(23, 18)
89 
90 #define V3D_QPU_ADD_B_SHIFT                 15
91 #define V3D_QPU_ADD_B_MASK                  QPU_MASK(17, 15)
92 
93 #define V3D_QPU_BRANCH_BDU_SHIFT            15
94 #define V3D_QPU_BRANCH_BDU_MASK             QPU_MASK(17, 15)
95 
96 #define V3D_QPU_BRANCH_UB                   QPU_MASK(14, 14)
97 
98 #define V3D_QPU_ADD_A_SHIFT                 12
99 #define V3D_QPU_ADD_A_MASK                  QPU_MASK(14, 12)
100 
101 #define V3D_QPU_BRANCH_BDI_SHIFT            12
102 #define V3D_QPU_BRANCH_BDI_MASK             QPU_MASK(13, 12)
103 
104 #define V3D_QPU_RADDR_D_SHIFT               12
105 #define V3D_QPU_RADDR_D_MASK                QPU_MASK(17, 12)
106 
107 #define V3D_QPU_RADDR_A_SHIFT               6
108 #define V3D_QPU_RADDR_A_MASK                QPU_MASK(11, 6)
109 
110 #define V3D_QPU_RADDR_B_SHIFT               0
111 #define V3D_QPU_RADDR_B_MASK                QPU_MASK(5, 0)
112 
113 #define THRSW .thrsw = true
114 #define LDUNIF .ldunif = true
115 #define LDUNIFRF .ldunifrf = true
116 #define LDUNIFA .ldunifa = true
117 #define LDUNIFARF .ldunifarf = true
118 #define LDTMU .ldtmu = true
119 #define LDVARY .ldvary = true
120 #define LDVPM .ldvpm = true
121 #define LDTLB .ldtlb = true
122 #define LDTLBU .ldtlbu = true
123 #define UCB .ucb = true
124 #define ROT .rotate = true
125 #define WRTMUC .wrtmuc = true
126 #define SMIMM_A .small_imm_a = true
127 #define SMIMM_B .small_imm_b = true
128 #define SMIMM_C .small_imm_c = true
129 #define SMIMM_D .small_imm_d = true
130 
131 static const struct v3d_qpu_sig v33_sig_map[] = {
132         /*      MISC   R3       R4      R5 */
133         [0]  = {                               },
134         [1]  = { THRSW,                        },
135         [2]  = {                        LDUNIF },
136         [3]  = { THRSW,                 LDUNIF },
137         [4]  = {                LDTMU,         },
138         [5]  = { THRSW,         LDTMU,         },
139         [6]  = {                LDTMU,  LDUNIF },
140         [7]  = { THRSW,         LDTMU,  LDUNIF },
141         [8]  = {        LDVARY,                },
142         [9]  = { THRSW, LDVARY,                },
143         [10] = {        LDVARY,         LDUNIF },
144         [11] = { THRSW, LDVARY,         LDUNIF },
145         [12] = {        LDVARY, LDTMU,         },
146         [13] = { THRSW, LDVARY, LDTMU,         },
147         [14] = { SMIMM_B, LDVARY,              },
148         [15] = { SMIMM_B,                      },
149         [16] = {        LDTLB,                 },
150         [17] = {        LDTLBU,                },
151         /* 18-21 reserved */
152         [22] = { UCB,                          },
153         [23] = { ROT,                          },
154         [24] = {        LDVPM,                 },
155         [25] = { THRSW, LDVPM,                 },
156         [26] = {        LDVPM,          LDUNIF },
157         [27] = { THRSW, LDVPM,          LDUNIF },
158         [28] = {        LDVPM, LDTMU,          },
159         [29] = { THRSW, LDVPM, LDTMU,          },
160         [30] = { SMIMM_B, LDVPM,               },
161         [31] = { SMIMM_B,                      },
162 };
163 
164 static const struct v3d_qpu_sig v40_sig_map[] = {
165         /*      MISC    R3      R4      R5 */
166         [0]  = {                               },
167         [1]  = { THRSW,                        },
168         [2]  = {                        LDUNIF },
169         [3]  = { THRSW,                 LDUNIF },
170         [4]  = {                LDTMU,         },
171         [5]  = { THRSW,         LDTMU,         },
172         [6]  = {                LDTMU,  LDUNIF },
173         [7]  = { THRSW,         LDTMU,  LDUNIF },
174         [8]  = {        LDVARY,                },
175         [9]  = { THRSW, LDVARY,                },
176         [10] = {        LDVARY,         LDUNIF },
177         [11] = { THRSW, LDVARY,         LDUNIF },
178         /* 12-13 reserved */
179         [14] = { SMIMM_B, LDVARY,              },
180         [15] = { SMIMM_B,                      },
181         [16] = {        LDTLB,                 },
182         [17] = {        LDTLBU,                },
183         [18] = {                        WRTMUC },
184         [19] = { THRSW,                 WRTMUC },
185         [20] = {        LDVARY,         WRTMUC },
186         [21] = { THRSW, LDVARY,         WRTMUC },
187         [22] = { UCB,                          },
188         [23] = { ROT,                          },
189         /* 24-30 reserved */
190         [31] = { SMIMM_B,       LDTMU,         },
191 };
192 
193 static const struct v3d_qpu_sig v41_sig_map[] = {
194         /*      MISC       phys    R5 */
195         [0]  = {                          },
196         [1]  = { THRSW,                   },
197         [2]  = {                   LDUNIF },
198         [3]  = { THRSW,            LDUNIF },
199         [4]  = {           LDTMU,         },
200         [5]  = { THRSW,    LDTMU,         },
201         [6]  = {           LDTMU,  LDUNIF },
202         [7]  = { THRSW,    LDTMU,  LDUNIF },
203         [8]  = {           LDVARY,        },
204         [9]  = { THRSW,    LDVARY,        },
205         [10] = {           LDVARY, LDUNIF },
206         [11] = { THRSW,    LDVARY, LDUNIF },
207         [12] = { LDUNIFRF                 },
208         [13] = { THRSW,    LDUNIFRF       },
209         [14] = { SMIMM_B,    LDVARY       },
210         [15] = { SMIMM_B,                 },
211         [16] = {           LDTLB,         },
212         [17] = {           LDTLBU,        },
213         [18] = {                          WRTMUC },
214         [19] = { THRSW,                   WRTMUC },
215         [20] = {           LDVARY,        WRTMUC },
216         [21] = { THRSW,    LDVARY,        WRTMUC },
217         [22] = { UCB,                     },
218         [23] = { ROT,                     },
219         [24] = {                   LDUNIFA},
220         [25] = { LDUNIFARF                },
221         /* 26-30 reserved */
222         [31] = { SMIMM_B,          LDTMU, },
223 };
224 
225 
226 static const struct v3d_qpu_sig v71_sig_map[] = {
227         /*      MISC       phys    RF0 */
228         [0]  = {                          },
229         [1]  = { THRSW,                   },
230         [2]  = {                   LDUNIF },
231         [3]  = { THRSW,            LDUNIF },
232         [4]  = {           LDTMU,         },
233         [5]  = { THRSW,    LDTMU,         },
234         [6]  = {           LDTMU,  LDUNIF },
235         [7]  = { THRSW,    LDTMU,  LDUNIF },
236         [8]  = {           LDVARY,        },
237         [9]  = { THRSW,    LDVARY,        },
238         [10] = {           LDVARY, LDUNIF },
239         [11] = { THRSW,    LDVARY, LDUNIF },
240         [12] = { LDUNIFRF                 },
241         [13] = { THRSW,    LDUNIFRF       },
242         [14] = { SMIMM_A,                 },
243         [15] = { SMIMM_B,                 },
244         [16] = {           LDTLB,         },
245         [17] = {           LDTLBU,        },
246         [18] = {                          WRTMUC },
247         [19] = { THRSW,                   WRTMUC },
248         [20] = {           LDVARY,        WRTMUC },
249         [21] = { THRSW,    LDVARY,        WRTMUC },
250         [22] = { UCB,                     },
251         /* 23 reserved */
252         [24] = {                   LDUNIFA},
253         [25] = { LDUNIFARF                },
254         /* 26-29 reserved */
255         [30] = { SMIMM_C,                 },
256         [31] = { SMIMM_D,                 },
257 };
258 
259 bool
v3d_qpu_sig_unpack(const struct v3d_device_info * devinfo,uint32_t packed_sig,struct v3d_qpu_sig * sig)260 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
261                    uint32_t packed_sig,
262                    struct v3d_qpu_sig *sig)
263 {
264         if (packed_sig >= ARRAY_SIZE(v33_sig_map))
265                 return false;
266 
267         if (devinfo->ver >= 71)
268                 *sig = v71_sig_map[packed_sig];
269         else if (devinfo->ver >= 41)
270                 *sig = v41_sig_map[packed_sig];
271         else if (devinfo->ver == 40)
272                 *sig = v40_sig_map[packed_sig];
273         else
274                 *sig = v33_sig_map[packed_sig];
275 
276         /* Signals with zeroed unpacked contents after element 0 are reserved. */
277         return (packed_sig == 0 ||
278                 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
279 }
280 
281 bool
v3d_qpu_sig_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig,uint32_t * packed_sig)282 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
283                  const struct v3d_qpu_sig *sig,
284                  uint32_t *packed_sig)
285 {
286         static const struct v3d_qpu_sig *map;
287 
288         if (devinfo->ver >= 71)
289                 map = v71_sig_map;
290         else if (devinfo->ver >= 41)
291                 map = v41_sig_map;
292         else if (devinfo->ver == 40)
293                 map = v40_sig_map;
294         else
295                 map = v33_sig_map;
296 
297         for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
298                 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
299                         *packed_sig = i;
300                         return true;
301                 }
302         }
303 
304         return false;
305 }
306 
307 static const uint32_t small_immediates[] = {
308         0, 1, 2, 3,
309         4, 5, 6, 7,
310         8, 9, 10, 11,
311         12, 13, 14, 15,
312         -16, -15, -14, -13,
313         -12, -11, -10, -9,
314         -8, -7, -6, -5,
315         -4, -3, -2, -1,
316         0x3b800000, /* 2.0^-8 */
317         0x3c000000, /* 2.0^-7 */
318         0x3c800000, /* 2.0^-6 */
319         0x3d000000, /* 2.0^-5 */
320         0x3d800000, /* 2.0^-4 */
321         0x3e000000, /* 2.0^-3 */
322         0x3e800000, /* 2.0^-2 */
323         0x3f000000, /* 2.0^-1 */
324         0x3f800000, /* 2.0^0 */
325         0x40000000, /* 2.0^1 */
326         0x40800000, /* 2.0^2 */
327         0x41000000, /* 2.0^3 */
328         0x41800000, /* 2.0^4 */
329         0x42000000, /* 2.0^5 */
330         0x42800000, /* 2.0^6 */
331         0x43000000, /* 2.0^7 */
332 };
333 
334 bool
v3d_qpu_small_imm_unpack(const struct v3d_device_info * devinfo,uint32_t packed_small_immediate,uint32_t * small_immediate)335 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
336                          uint32_t packed_small_immediate,
337                          uint32_t *small_immediate)
338 {
339         if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
340                 return false;
341 
342         *small_immediate = small_immediates[packed_small_immediate];
343         return true;
344 }
345 
346 bool
v3d_qpu_small_imm_pack(const struct v3d_device_info * devinfo,uint32_t value,uint32_t * packed_small_immediate)347 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
348                        uint32_t value,
349                        uint32_t *packed_small_immediate)
350 {
351         STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
352 
353         for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
354                 if (small_immediates[i] == value) {
355                         *packed_small_immediate = i;
356                         return true;
357                 }
358         }
359 
360         return false;
361 }
362 
363 bool
v3d_qpu_flags_unpack(const struct v3d_device_info * devinfo,uint32_t packed_cond,struct v3d_qpu_flags * cond)364 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
365                      uint32_t packed_cond,
366                      struct v3d_qpu_flags *cond)
367 {
368         static const enum v3d_qpu_cond cond_map[4] = {
369                 [0] = V3D_QPU_COND_IFA,
370                 [1] = V3D_QPU_COND_IFB,
371                 [2] = V3D_QPU_COND_IFNA,
372                 [3] = V3D_QPU_COND_IFNB,
373         };
374 
375         cond->ac = V3D_QPU_COND_NONE;
376         cond->mc = V3D_QPU_COND_NONE;
377         cond->apf = V3D_QPU_PF_NONE;
378         cond->mpf = V3D_QPU_PF_NONE;
379         cond->auf = V3D_QPU_UF_NONE;
380         cond->muf = V3D_QPU_UF_NONE;
381 
382         if (packed_cond == 0) {
383                 return true;
384         } else if (packed_cond >> 2 == 0) {
385                 cond->apf = packed_cond & 0x3;
386         } else if (packed_cond >> 4 == 0) {
387                 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
388         } else if (packed_cond == 0x10) {
389                 return false;
390         } else if (packed_cond >> 2 == 0x4) {
391                 cond->mpf = packed_cond & 0x3;
392         } else if (packed_cond >> 4 == 0x1) {
393                 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
394         } else if (packed_cond >> 4 == 0x2) {
395                 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
396                 cond->mpf = packed_cond & 0x3;
397         } else if (packed_cond >> 4 == 0x3) {
398                 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
399                 cond->apf = packed_cond & 0x3;
400         } else if (packed_cond >> 6) {
401                 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
402                 if (((packed_cond >> 2) & 0x3) == 0) {
403                         cond->ac = cond_map[packed_cond & 0x3];
404                 } else {
405                         cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
406                 }
407         }
408 
409         return true;
410 }
411 
412 bool
v3d_qpu_flags_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_flags * cond,uint32_t * packed_cond)413 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
414                    const struct v3d_qpu_flags *cond,
415                    uint32_t *packed_cond)
416 {
417 #define AC (1 << 0)
418 #define MC (1 << 1)
419 #define APF (1 << 2)
420 #define MPF (1 << 3)
421 #define AUF (1 << 4)
422 #define MUF (1 << 5)
423         static const struct {
424                 uint8_t flags_present;
425                 uint8_t bits;
426         } flags_table[] = {
427                 { 0,        0 },
428                 { APF,      0 },
429                 { AUF,      0 },
430                 { MPF,      (1 << 4) },
431                 { MUF,      (1 << 4) },
432                 { AC,       (1 << 5) },
433                 { AC | MPF, (1 << 5) },
434                 { MC,       (1 << 5) | (1 << 4) },
435                 { MC | APF, (1 << 5) | (1 << 4) },
436                 { MC | AC,  (1 << 6) },
437                 { MC | AUF, (1 << 6) },
438         };
439 
440         uint8_t flags_present = 0;
441         if (cond->ac != V3D_QPU_COND_NONE)
442                 flags_present |= AC;
443         if (cond->mc != V3D_QPU_COND_NONE)
444                 flags_present |= MC;
445         if (cond->apf != V3D_QPU_PF_NONE)
446                 flags_present |= APF;
447         if (cond->mpf != V3D_QPU_PF_NONE)
448                 flags_present |= MPF;
449         if (cond->auf != V3D_QPU_UF_NONE)
450                 flags_present |= AUF;
451         if (cond->muf != V3D_QPU_UF_NONE)
452                 flags_present |= MUF;
453 
454         for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
455                 if (flags_table[i].flags_present != flags_present)
456                         continue;
457 
458                 *packed_cond = flags_table[i].bits;
459 
460                 *packed_cond |= cond->apf;
461                 *packed_cond |= cond->mpf;
462 
463                 if (flags_present & AUF)
464                         *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
465                 if (flags_present & MUF)
466                         *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
467 
468                 if (flags_present & AC) {
469                         if (*packed_cond & (1 << 6))
470                                 *packed_cond |= cond->ac - V3D_QPU_COND_IFA;
471                         else
472                                 *packed_cond |= (cond->ac -
473                                                  V3D_QPU_COND_IFA) << 2;
474                 }
475 
476                 if (flags_present & MC) {
477                         if (*packed_cond & (1 << 6))
478                                 *packed_cond |= (cond->mc -
479                                                  V3D_QPU_COND_IFA) << 4;
480                         else
481                                 *packed_cond |= (cond->mc -
482                                                  V3D_QPU_COND_IFA) << 2;
483                 }
484 
485                 return true;
486         }
487 
488         return false;
489 }
490 
491 /* Make a mapping of the table of opcodes in the spec.  The opcode is
492  * determined by a combination of the opcode field, and in the case of 0 or
493  * 1-arg opcodes, the mux (version <= 42) or raddr (version >= 71) field as
494  * well.
495  */
496 #define OP_MASK(val) BITFIELD64_BIT(val)
497 #define OP_RANGE(bot, top) BITFIELD64_RANGE(bot, top - bot + 1)
498 #define ANYMUX OP_RANGE(0, 7)
499 #define ANYOPMASK OP_RANGE(0, 63)
500 
501 struct opcode_desc {
502         uint8_t opcode_first;
503         uint8_t opcode_last;
504 
505         union {
506                 struct {
507                         uint8_t b_mask;
508                         uint8_t a_mask;
509                 } mux;
510                 uint64_t raddr_mask;
511         };
512 
513         uint8_t op;
514 
515         /* first_ver == 0 if it's the same across all V3D versions.
516          * first_ver == X, last_ver == 0 if it's the same for all V3D versions
517          *   starting from X
518          * first_ver == X, last_ver == Y if it's the same for all V3D versions
519          *   on the range X through Y
520          */
521         uint8_t first_ver;
522         uint8_t last_ver;
523 };
524 
525 static const struct opcode_desc add_ops_v33[] = {
526         /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
527         { 0,   47,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADD },
528         { 0,   47,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADDNF },
529         { 53,  55,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
530         { 56,  56,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ADD },
531         { 57,  59,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
532         { 60,  60,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SUB },
533         { 61,  63,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
534         { 64,  111, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FSUB },
535         { 120, 120, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MIN },
536         { 121, 121, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MAX },
537         { 122, 122, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMIN },
538         { 123, 123, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMAX },
539         { 124, 124, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHL },
540         { 125, 125, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHR },
541         { 126, 126, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ASR },
542         { 127, 127, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ROR },
543         /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
544         { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMIN },
545         { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMAX },
546         { 176, 180, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMIN },
547 
548         { 181, 181, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_AND },
549         { 182, 182, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_OR },
550         { 183, 183, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_XOR },
551 
552         { 184, 184, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VADD },
553         { 185, 185, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VSUB },
554         { 186, 186, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_NOT },
555         { 186, 186, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_NEG },
556         { 186, 186, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_FLAPUSH },
557         { 186, 186, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FLBPUSH },
558         { 186, 186, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_FLPOP },
559         { 186, 186, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_RECIP },
560         { 186, 186, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SETMSF },
561         { 186, 186, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_SETREVF },
562         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 },
563         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(1), V3D_QPU_A_TIDX },
564         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(2), V3D_QPU_A_EIDX },
565         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(3), V3D_QPU_A_LR },
566         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(4), V3D_QPU_A_VFLA },
567         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(5), V3D_QPU_A_VFLNA },
568         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VFLB },
569         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(7), V3D_QPU_A_VFLNB },
570 
571         { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(0, 2), V3D_QPU_A_FXCD },
572         { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(3), V3D_QPU_A_XCD },
573         { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(4, 6), V3D_QPU_A_FYCD },
574         { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(7), V3D_QPU_A_YCD },
575 
576         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(0), V3D_QPU_A_MSF },
577         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(1), V3D_QPU_A_REVF },
578         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_VDWWT, 33 },
579         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_IID, 40 },
580         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(3), V3D_QPU_A_SAMPID, 40 },
581         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(4), V3D_QPU_A_BARRIERID, 40 },
582         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(5), V3D_QPU_A_TMUWT },
583         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VPMWT },
584         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(7), V3D_QPU_A_FLAFIRST, 41 },
585         { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = OP_MASK(0), V3D_QPU_A_FLNAFIRST, 41 },
586         { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
587 
588         { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
589         { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
590         { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
591         { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
592         { 188, 188, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMP, 40 },
593         { 188, 188, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT, 41 },
594         { 188, 188, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_EXP, 41 },
595         { 188, 188, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_LOG, 41 },
596         { 188, 188, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SIN, 41 },
597         { 188, 188, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT2, 41 },
598         { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
599         { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
600 
601         /* FIXME: MORE COMPLICATED */
602         /* { 190, 191, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
603 
604         { 192, 239, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FCMP },
605         { 240, 244, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMAX },
606 
607         { 245, 245, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FROUND },
608         { 245, 245, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIN },
609         { 245, 245, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FTRUNC },
610         { 245, 245, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIZ },
611         { 246, 246, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FFLOOR },
612         { 246, 246, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOUZ },
613         { 246, 246, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FCEIL },
614         { 246, 246, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOC },
615 
616         { 247, 247, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FDX },
617         { 247, 247, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FDY },
618 
619         /* The stvpms are distinguished by the waddr field. */
620         { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMV },
621         { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMD },
622         { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMP },
623 
624         { 252, 252, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_ITOF },
625         { 252, 252, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_CLZ },
626         { 252, 252, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_UTOF },
627 };
628 
629 static const struct opcode_desc mul_ops_v33[] = {
630         { 1, 1, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_ADD },
631         { 2, 2, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SUB },
632         { 3, 3, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_UMUL24 },
633         { 4, 8, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_VFMUL },
634         { 9, 9, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SMUL24 },
635         { 10, 10, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_MULTOP },
636         { 14, 14, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMOV, 33, 42 },
637         { 15, 15, .mux.b_mask = OP_RANGE(0, 3), ANYMUX, V3D_QPU_M_FMOV, 33, 42},
638         { 15, 15, .mux.b_mask = OP_MASK(4), .mux.a_mask = OP_MASK(0), V3D_QPU_M_NOP, 33, 42 },
639         { 15, 15, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_M_MOV, 33, 42 },
640 
641         { 16, 63, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMUL },
642 };
643 
644 /* Note that it would have been possible to define all the add/mul opcodes in
645  * just one table, using the first_ver/last_ver. But taking into account that
646  * for v71 there were a lot of changes, it was more tidy this way. Also right
647  * now we are doing a linear search on those tables, so this maintains the
648  * tables smaller.
649  *
650  * Just in case we merge the tables, we define the first_ver as 71 for those
651  * opcodes that changed on v71
652  */
653 static const struct opcode_desc add_ops_v71[] = {
654         /* FADD is FADDNF depending on the order of the raddr_a/raddr_b. */
655         { 0,   47,  .raddr_mask = ANYOPMASK, V3D_QPU_A_FADD },
656         { 0,   47,  .raddr_mask = ANYOPMASK, V3D_QPU_A_FADDNF },
657         { 53,  55,  .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
658         { 56,  56,  .raddr_mask = ANYOPMASK, V3D_QPU_A_ADD },
659         { 57,  59,  .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
660         { 60,  60,  .raddr_mask = ANYOPMASK, V3D_QPU_A_SUB },
661         { 61,  63,  .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
662         { 64,  111, .raddr_mask = ANYOPMASK, V3D_QPU_A_FSUB },
663         { 120, 120, .raddr_mask = ANYOPMASK, V3D_QPU_A_MIN },
664         { 121, 121, .raddr_mask = ANYOPMASK, V3D_QPU_A_MAX },
665         { 122, 122, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMIN },
666         { 123, 123, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMAX },
667         { 124, 124, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHL },
668         { 125, 125, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHR },
669         { 126, 126, .raddr_mask = ANYOPMASK, V3D_QPU_A_ASR },
670         { 127, 127, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROR },
671         /* FMIN is instead FMAX depending on the raddr_a/b order. */
672         { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMIN },
673         { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMAX },
674         { 176, 180, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFMIN },
675 
676         { 181, 181, .raddr_mask = ANYOPMASK, V3D_QPU_A_AND },
677         { 182, 182, .raddr_mask = ANYOPMASK, V3D_QPU_A_OR },
678         { 183, 183, .raddr_mask = ANYOPMASK, V3D_QPU_A_XOR },
679         { 184, 184, .raddr_mask = ANYOPMASK, V3D_QPU_A_VADD },
680         { 185, 185, .raddr_mask = ANYOPMASK, V3D_QPU_A_VSUB },
681 
682         { 186, 186, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOT },
683         { 186, 186, .raddr_mask = OP_MASK(1), V3D_QPU_A_NEG },
684         { 186, 186, .raddr_mask = OP_MASK(2), V3D_QPU_A_FLAPUSH },
685         { 186, 186, .raddr_mask = OP_MASK(3), V3D_QPU_A_FLBPUSH },
686         { 186, 186, .raddr_mask = OP_MASK(4), V3D_QPU_A_FLPOP },
687         { 186, 186, .raddr_mask = OP_MASK(5), V3D_QPU_A_CLZ },
688         { 186, 186, .raddr_mask = OP_MASK(6), V3D_QPU_A_SETMSF },
689         { 186, 186, .raddr_mask = OP_MASK(7), V3D_QPU_A_SETREVF },
690 
691         { 187, 187, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 },
692         { 187, 187, .raddr_mask = OP_MASK(1), V3D_QPU_A_TIDX },
693         { 187, 187, .raddr_mask = OP_MASK(2), V3D_QPU_A_EIDX },
694         { 187, 187, .raddr_mask = OP_MASK(3), V3D_QPU_A_LR },
695         { 187, 187, .raddr_mask = OP_MASK(4), V3D_QPU_A_VFLA },
696         { 187, 187, .raddr_mask = OP_MASK(5), V3D_QPU_A_VFLNA },
697         { 187, 187, .raddr_mask = OP_MASK(6), V3D_QPU_A_VFLB },
698         { 187, 187, .raddr_mask = OP_MASK(7), V3D_QPU_A_VFLNB },
699         { 187, 187, .raddr_mask = OP_MASK(8), V3D_QPU_A_XCD },
700         { 187, 187, .raddr_mask = OP_MASK(9), V3D_QPU_A_YCD },
701         { 187, 187, .raddr_mask = OP_MASK(10), V3D_QPU_A_MSF },
702         { 187, 187, .raddr_mask = OP_MASK(11), V3D_QPU_A_REVF },
703         { 187, 187, .raddr_mask = OP_MASK(12), V3D_QPU_A_IID },
704         { 187, 187, .raddr_mask = OP_MASK(13), V3D_QPU_A_SAMPID },
705         { 187, 187, .raddr_mask = OP_MASK(14), V3D_QPU_A_BARRIERID },
706         { 187, 187, .raddr_mask = OP_MASK(15), V3D_QPU_A_TMUWT },
707         { 187, 187, .raddr_mask = OP_MASK(16), V3D_QPU_A_VPMWT },
708         { 187, 187, .raddr_mask = OP_MASK(17), V3D_QPU_A_FLAFIRST },
709         { 187, 187, .raddr_mask = OP_MASK(18), V3D_QPU_A_FLNAFIRST },
710 
711         { 187, 187, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FXCD },
712         { 187, 187, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FYCD },
713 
714         { 188, 188, .raddr_mask = OP_MASK(0), V3D_QPU_A_LDVPMV_IN, 71 },
715         { 188, 188, .raddr_mask = OP_MASK(1), V3D_QPU_A_LDVPMD_IN, 71 },
716         { 188, 188, .raddr_mask = OP_MASK(2), V3D_QPU_A_LDVPMP, 71 },
717 
718         { 188, 188, .raddr_mask = OP_MASK(32), V3D_QPU_A_RECIP, 71 },
719         { 188, 188, .raddr_mask = OP_MASK(33), V3D_QPU_A_RSQRT, 71 },
720         { 188, 188, .raddr_mask = OP_MASK(34), V3D_QPU_A_EXP, 71 },
721         { 188, 188, .raddr_mask = OP_MASK(35), V3D_QPU_A_LOG, 71 },
722         { 188, 188, .raddr_mask = OP_MASK(36), V3D_QPU_A_SIN, 71 },
723         { 188, 188, .raddr_mask = OP_MASK(37), V3D_QPU_A_RSQRT2, 71 },
724         { 188, 188, .raddr_mask = OP_MASK(38), V3D_QPU_A_BALLOT, 71 },
725         { 188, 188, .raddr_mask = OP_MASK(39), V3D_QPU_A_BCASTF, 71 },
726         { 188, 188, .raddr_mask = OP_MASK(40), V3D_QPU_A_ALLEQ, 71 },
727         { 188, 188, .raddr_mask = OP_MASK(41), V3D_QPU_A_ALLFEQ, 71 },
728 
729         { 189, 189, .raddr_mask = ANYOPMASK, V3D_QPU_A_LDVPMG_IN, 71 },
730 
731         /* The stvpms are distinguished by the waddr field. */
732         { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMV, 71},
733         { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMD, 71},
734         { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMP, 71},
735 
736         { 192, 207, .raddr_mask = ANYOPMASK, V3D_QPU_A_FCMP, 71 },
737 
738         { 245, 245, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_A_FROUND, 71 },
739         { 245, 245, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_A_FROUND, 71 },
740         { 245, 245, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_A_FROUND, 71 },
741         { 245, 245, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FROUND, 71 },
742 
743         { 245, 245, .raddr_mask = OP_MASK(3),  V3D_QPU_A_FTOIN, 71 },
744         { 245, 245, .raddr_mask = OP_MASK(7),  V3D_QPU_A_FTOIN, 71 },
745         { 245, 245, .raddr_mask = OP_MASK(11), V3D_QPU_A_FTOIN, 71 },
746         { 245, 245, .raddr_mask = OP_MASK(15), V3D_QPU_A_FTOIN, 71 },
747 
748         { 245, 245, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FTRUNC, 71 },
749         { 245, 245, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FTRUNC, 71 },
750         { 245, 245, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FTRUNC, 71 },
751         { 245, 245, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FTRUNC, 71 },
752 
753         { 245, 245, .raddr_mask = OP_MASK(19), V3D_QPU_A_FTOIZ, 71 },
754         { 245, 245, .raddr_mask = OP_MASK(23), V3D_QPU_A_FTOIZ, 71 },
755         { 245, 245, .raddr_mask = OP_MASK(27), V3D_QPU_A_FTOIZ, 71 },
756         { 245, 245, .raddr_mask = OP_MASK(31), V3D_QPU_A_FTOIZ, 71 },
757 
758         { 245, 245, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FFLOOR, 71 },
759         { 245, 245, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FFLOOR, 71 },
760         { 245, 245, .raddr_mask = OP_RANGE(40, 42), V3D_QPU_A_FFLOOR, 71 },
761         { 245, 245, .raddr_mask = OP_RANGE(44, 46), V3D_QPU_A_FFLOOR, 71 },
762 
763         { 245, 245, .raddr_mask = OP_MASK(35), V3D_QPU_A_FTOUZ, 71 },
764         { 245, 245, .raddr_mask = OP_MASK(39), V3D_QPU_A_FTOUZ, 71 },
765         { 245, 245, .raddr_mask = OP_MASK(43), V3D_QPU_A_FTOUZ, 71 },
766         { 245, 245, .raddr_mask = OP_MASK(47), V3D_QPU_A_FTOUZ, 71 },
767 
768         { 245, 245, .raddr_mask = OP_RANGE(48, 50), V3D_QPU_A_FCEIL, 71 },
769         { 245, 245, .raddr_mask = OP_RANGE(52, 54), V3D_QPU_A_FCEIL, 71 },
770         { 245, 245, .raddr_mask = OP_RANGE(56, 58), V3D_QPU_A_FCEIL, 71 },
771         { 245, 245, .raddr_mask = OP_RANGE(60, 62), V3D_QPU_A_FCEIL, 71 },
772 
773         { 245, 245, .raddr_mask = OP_MASK(51), V3D_QPU_A_FTOC },
774         { 245, 245, .raddr_mask = OP_MASK(55), V3D_QPU_A_FTOC },
775         { 245, 245, .raddr_mask = OP_MASK(59), V3D_QPU_A_FTOC },
776         { 245, 245, .raddr_mask = OP_MASK(63), V3D_QPU_A_FTOC },
777 
778         { 246, 246, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_A_FDX, 71 },
779         { 246, 246, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_A_FDX, 71 },
780         { 246, 246, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_A_FDX, 71 },
781         { 246, 246, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FDX, 71 },
782         { 246, 246, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FDY, 71 },
783         { 246, 246, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FDY, 71 },
784         { 246, 246, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FDY, 71 },
785         { 246, 246, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FDY, 71 },
786 
787         { 246, 246, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_ITOF, 71 },
788         { 246, 246, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_UTOF, 71 },
789 
790         { 247, 247, .raddr_mask = ANYOPMASK, V3D_QPU_A_VPACK, 71 },
791         { 248, 248, .raddr_mask = ANYOPMASK, V3D_QPU_A_V8PACK, 71 },
792 
793         { 249, 249, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_A_FMOV, 71 },
794         { 249, 249, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_A_FMOV, 71 },
795         { 249, 249, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_A_FMOV, 71 },
796         { 249, 249, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FMOV, 71 },
797         { 249, 249, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FMOV, 71 },
798         { 249, 249, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FMOV, 71 },
799         { 249, 249, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FMOV, 71 },
800 
801         { 249, 249, .raddr_mask = OP_MASK(3),  V3D_QPU_A_MOV, 71 },
802         { 249, 249, .raddr_mask = OP_MASK(7),  V3D_QPU_A_MOV, 71 },
803         { 249, 249, .raddr_mask = OP_MASK(11), V3D_QPU_A_MOV, 71 },
804         { 249, 249, .raddr_mask = OP_MASK(15), V3D_QPU_A_MOV, 71 },
805         { 249, 249, .raddr_mask = OP_MASK(19), V3D_QPU_A_MOV, 71 },
806 
807         { 250, 250, .raddr_mask = ANYOPMASK, V3D_QPU_A_V10PACK, 71 },
808         { 251, 251, .raddr_mask = ANYOPMASK, V3D_QPU_A_V11FPACK, 71 },
809 
810         { 252, 252, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROTQ, 71 },
811         { 253, 253, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROT, 71 },
812         { 254, 254, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHUFFLE, 71 },
813 };
814 
815 static const struct opcode_desc mul_ops_v71[] = {
816         /* For V3D 7.1, second mask field would be ignored */
817         { 1, 1, .raddr_mask = ANYOPMASK, V3D_QPU_M_ADD, 71 },
818         { 2, 2, .raddr_mask = ANYOPMASK, V3D_QPU_M_SUB, 71 },
819         { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 },
820         { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 },
821         { 4, 8, .raddr_mask = ANYOPMASK, V3D_QPU_M_VFMUL, 71 },
822         { 9, 9, .raddr_mask = ANYOPMASK, V3D_QPU_M_SMUL24, 71 },
823         { 10, 10, .raddr_mask = ANYOPMASK, V3D_QPU_M_MULTOP, 71 },
824 
825         { 14, 14, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_M_FMOV, 71 },
826         { 14, 14, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_M_FMOV, 71 },
827         { 14, 14, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_M_FMOV, 71 },
828         { 14, 14, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_M_FMOV, 71 },
829         { 14, 14, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_M_FMOV, 71 },
830         { 14, 14, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_M_FMOV, 71 },
831 
832         { 14, 14, .raddr_mask = OP_MASK(3),  V3D_QPU_M_MOV, 71 },
833         { 14, 14, .raddr_mask = OP_MASK(7),  V3D_QPU_M_MOV, 71 },
834         { 14, 14, .raddr_mask = OP_MASK(11), V3D_QPU_M_MOV, 71 },
835         { 14, 14, .raddr_mask = OP_MASK(15), V3D_QPU_M_MOV, 71 },
836         { 14, 14, .raddr_mask = OP_MASK(19), V3D_QPU_M_MOV, 71 },
837 
838         { 14, 14, .raddr_mask = OP_MASK(32), V3D_QPU_M_FTOUNORM16, 71 },
839         { 14, 14, .raddr_mask = OP_MASK(33), V3D_QPU_M_FTOSNORM16, 71 },
840         { 14, 14, .raddr_mask = OP_MASK(34), V3D_QPU_M_VFTOUNORM8, 71 },
841         { 14, 14, .raddr_mask = OP_MASK(35), V3D_QPU_M_VFTOSNORM8, 71 },
842         { 14, 14, .raddr_mask = OP_MASK(48), V3D_QPU_M_VFTOUNORM10LO, 71 },
843         { 14, 14, .raddr_mask = OP_MASK(49), V3D_QPU_M_VFTOUNORM10HI, 71 },
844 
845         { 14, 14, .raddr_mask = OP_MASK(63), V3D_QPU_M_NOP, 71 },
846 
847         { 16, 63, .raddr_mask = ANYOPMASK, V3D_QPU_M_FMUL },
848 };
849 
850 /* Returns true if op_desc should be filtered out based on devinfo->ver
851  * against op_desc->first_ver and op_desc->last_ver. Check notes about
852  * first_ver/last_ver on struct opcode_desc comments.
853  */
854 static bool
opcode_invalid_in_version(const struct v3d_device_info * devinfo,const uint8_t first_ver,const uint8_t last_ver)855 opcode_invalid_in_version(const struct v3d_device_info *devinfo,
856                           const uint8_t first_ver,
857                           const uint8_t last_ver)
858 {
859         return (first_ver != 0 && devinfo->ver < first_ver) ||
860                 (last_ver != 0  && devinfo->ver > last_ver);
861 }
862 
863 /* Note that we pass as parameters mux_a, mux_b and raddr, even if depending
864  * on the devinfo->ver some would be ignored. We do this way just to avoid
865  * having two really similar lookup_opcode methods
866  */
867 static const struct opcode_desc *
lookup_opcode_from_packed(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint32_t opcode,uint32_t mux_a,uint32_t mux_b,uint32_t raddr)868 lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
869                           const struct opcode_desc *opcodes,
870                           size_t num_opcodes, uint32_t opcode,
871                           uint32_t mux_a, uint32_t mux_b,
872                           uint32_t raddr)
873 {
874         for (int i = 0; i < num_opcodes; i++) {
875                 const struct opcode_desc *op_desc = &opcodes[i];
876 
877                 if (opcode < op_desc->opcode_first ||
878                     opcode > op_desc->opcode_last)
879                         continue;
880 
881                 if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver))
882                         continue;
883 
884                 if (devinfo->ver < 71) {
885                         if (!(op_desc->mux.b_mask & (1 << mux_b)))
886                                 continue;
887 
888                         if (!(op_desc->mux.a_mask & (1 << mux_a)))
889                                 continue;
890                 } else {
891                         if (!(op_desc->raddr_mask & ((uint64_t) 1 << raddr)))
892                                 continue;
893                 }
894 
895                 return op_desc;
896         }
897 
898         return NULL;
899 }
900 
901 static bool
v3d_qpu_float32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)902 v3d_qpu_float32_unpack_unpack(uint32_t packed,
903                               enum v3d_qpu_input_unpack *unpacked)
904 {
905         switch (packed) {
906         case 0:
907                 *unpacked = V3D_QPU_UNPACK_ABS;
908                 return true;
909         case 1:
910                 *unpacked = V3D_QPU_UNPACK_NONE;
911                 return true;
912         case 2:
913                 *unpacked = V3D_QPU_UNPACK_L;
914                 return true;
915         case 3:
916                 *unpacked = V3D_QPU_UNPACK_H;
917                 return true;
918         default:
919                 return false;
920         }
921 }
922 
923 static bool
v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)924 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
925                             uint32_t *packed)
926 {
927         switch (unpacked) {
928         case V3D_QPU_UNPACK_ABS:
929                 *packed = 0;
930                 return true;
931         case V3D_QPU_UNPACK_NONE:
932                 *packed = 1;
933                 return true;
934         case V3D_QPU_UNPACK_L:
935                 *packed = 2;
936                 return true;
937         case V3D_QPU_UNPACK_H:
938                 *packed = 3;
939                 return true;
940         default:
941                 return false;
942         }
943 }
944 
945 static bool
v3d_qpu_int32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)946 v3d_qpu_int32_unpack_unpack(uint32_t packed,
947                             enum v3d_qpu_input_unpack *unpacked)
948 {
949         switch (packed) {
950         case 0:
951                 *unpacked = V3D_QPU_UNPACK_NONE;
952                 return true;
953         case 1:
954                 *unpacked = V3D_QPU_UNPACK_UL;
955                 return true;
956         case 2:
957                 *unpacked = V3D_QPU_UNPACK_UH;
958                 return true;
959         case 3:
960                 *unpacked = V3D_QPU_UNPACK_IL;
961                 return true;
962         case 4:
963                 *unpacked = V3D_QPU_UNPACK_IH;
964                 return true;
965         default:
966                 return false;
967         }
968 }
969 
970 static bool
v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)971 v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
972                           uint32_t *packed)
973 {
974         switch (unpacked) {
975         case V3D_QPU_UNPACK_NONE:
976                 *packed = 0;
977                 return true;
978         case V3D_QPU_UNPACK_UL:
979                 *packed = 1;
980                 return true;
981         case V3D_QPU_UNPACK_UH:
982                 *packed = 2;
983                 return true;
984         case V3D_QPU_UNPACK_IL:
985                 *packed = 3;
986                 return true;
987         case V3D_QPU_UNPACK_IH:
988                 *packed = 4;
989                 return true;
990         default:
991                 return false;
992         }
993 }
994 
995 static bool
v3d_qpu_float16_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)996 v3d_qpu_float16_unpack_unpack(uint32_t packed,
997                               enum v3d_qpu_input_unpack *unpacked)
998 {
999         switch (packed) {
1000         case 0:
1001                 *unpacked = V3D_QPU_UNPACK_NONE;
1002                 return true;
1003         case 1:
1004                 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
1005                 return true;
1006         case 2:
1007                 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
1008                 return true;
1009         case 3:
1010                 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
1011                 return true;
1012         case 4:
1013                 *unpacked = V3D_QPU_UNPACK_SWAP_16;
1014                 return true;
1015         default:
1016                 return false;
1017         }
1018 }
1019 
1020 static bool
v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)1021 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
1022                             uint32_t *packed)
1023 {
1024         switch (unpacked) {
1025         case V3D_QPU_UNPACK_NONE:
1026                 *packed = 0;
1027                 return true;
1028         case V3D_QPU_UNPACK_REPLICATE_32F_16:
1029                 *packed = 1;
1030                 return true;
1031         case V3D_QPU_UNPACK_REPLICATE_L_16:
1032                 *packed = 2;
1033                 return true;
1034         case V3D_QPU_UNPACK_REPLICATE_H_16:
1035                 *packed = 3;
1036                 return true;
1037         case V3D_QPU_UNPACK_SWAP_16:
1038                 *packed = 4;
1039                 return true;
1040         default:
1041                 return false;
1042         }
1043 }
1044 
1045 static bool
v3d_qpu_float32_pack_pack(enum v3d_qpu_output_pack pack,uint32_t * packed)1046 v3d_qpu_float32_pack_pack(enum v3d_qpu_output_pack pack,
1047                           uint32_t *packed)
1048 {
1049         switch (pack) {
1050         case V3D_QPU_PACK_NONE:
1051                 *packed = 0;
1052                 return true;
1053         case V3D_QPU_PACK_L:
1054                 *packed = 1;
1055                 return true;
1056         case V3D_QPU_PACK_H:
1057                 *packed = 2;
1058                 return true;
1059         default:
1060                 return false;
1061         }
1062 }
1063 
1064 static bool
v3d33_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1065 v3d33_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1066                      struct v3d_qpu_instr *instr)
1067 {
1068         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
1069         uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
1070         uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);
1071         uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1072 
1073         uint32_t map_op = op;
1074         /* Some big clusters of opcodes are replicated with unpack
1075          * flags
1076          */
1077         if (map_op >= 249 && map_op <= 251)
1078                 map_op = (map_op - 249 + 245);
1079         if (map_op >= 253 && map_op <= 255)
1080                 map_op = (map_op - 253 + 245);
1081 
1082         const struct opcode_desc *desc =
1083                 lookup_opcode_from_packed(devinfo, add_ops_v33,
1084                                           ARRAY_SIZE(add_ops_v33),
1085                                           map_op, mux_a, mux_b, 0);
1086 
1087         if (!desc)
1088                 return false;
1089 
1090         instr->alu.add.op = desc->op;
1091 
1092         /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
1093          * operands.
1094          */
1095         if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
1096                 if (instr->alu.add.op == V3D_QPU_A_FMIN)
1097                         instr->alu.add.op = V3D_QPU_A_FMAX;
1098                 if (instr->alu.add.op == V3D_QPU_A_FADD)
1099                         instr->alu.add.op = V3D_QPU_A_FADDNF;
1100         }
1101 
1102         /* Some QPU ops require a bit more than just basic opcode and mux a/b
1103          * comparisons to distinguish them.
1104          */
1105         switch (instr->alu.add.op) {
1106         case V3D_QPU_A_STVPMV:
1107         case V3D_QPU_A_STVPMD:
1108         case V3D_QPU_A_STVPMP:
1109                 switch (waddr) {
1110                 case 0:
1111                         instr->alu.add.op = V3D_QPU_A_STVPMV;
1112                         break;
1113                 case 1:
1114                         instr->alu.add.op = V3D_QPU_A_STVPMD;
1115                         break;
1116                 case 2:
1117                         instr->alu.add.op = V3D_QPU_A_STVPMP;
1118                         break;
1119                 default:
1120                         return false;
1121                 }
1122                 break;
1123         default:
1124                 break;
1125         }
1126 
1127         switch (instr->alu.add.op) {
1128         case V3D_QPU_A_FADD:
1129         case V3D_QPU_A_FADDNF:
1130         case V3D_QPU_A_FSUB:
1131         case V3D_QPU_A_FMIN:
1132         case V3D_QPU_A_FMAX:
1133         case V3D_QPU_A_FCMP:
1134         case V3D_QPU_A_VFPACK:
1135                 if (instr->alu.add.op != V3D_QPU_A_VFPACK)
1136                         instr->alu.add.output_pack = (op >> 4) & 0x3;
1137                 else
1138                         instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1139 
1140                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1141                                                    &instr->alu.add.a.unpack)) {
1142                         return false;
1143                 }
1144 
1145                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
1146                                                    &instr->alu.add.b.unpack)) {
1147                         return false;
1148                 }
1149                 break;
1150 
1151         case V3D_QPU_A_FFLOOR:
1152         case V3D_QPU_A_FROUND:
1153         case V3D_QPU_A_FTRUNC:
1154         case V3D_QPU_A_FCEIL:
1155         case V3D_QPU_A_FDX:
1156         case V3D_QPU_A_FDY:
1157                 instr->alu.add.output_pack = mux_b & 0x3;
1158 
1159                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1160                                                    &instr->alu.add.a.unpack)) {
1161                         return false;
1162                 }
1163                 break;
1164 
1165         case V3D_QPU_A_FTOIN:
1166         case V3D_QPU_A_FTOIZ:
1167         case V3D_QPU_A_FTOUZ:
1168         case V3D_QPU_A_FTOC:
1169                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1170 
1171                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1172                                                    &instr->alu.add.a.unpack)) {
1173                         return false;
1174                 }
1175                 break;
1176 
1177         case V3D_QPU_A_VFMIN:
1178         case V3D_QPU_A_VFMAX:
1179                 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
1180                                                    &instr->alu.add.a.unpack)) {
1181                         return false;
1182                 }
1183 
1184                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1185                 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1186                 break;
1187 
1188         default:
1189                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1190                 instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
1191                 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1192                 break;
1193         }
1194 
1195         instr->alu.add.a.mux = mux_a;
1196         instr->alu.add.b.mux = mux_b;
1197         instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1198 
1199         instr->alu.add.magic_write = false;
1200         if (packed_inst & V3D_QPU_MA) {
1201                 switch (instr->alu.add.op) {
1202                 case V3D_QPU_A_LDVPMV_IN:
1203                         instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
1204                         break;
1205                 case V3D_QPU_A_LDVPMD_IN:
1206                         instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
1207                         break;
1208                 case V3D_QPU_A_LDVPMG_IN:
1209                         instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
1210                         break;
1211                 default:
1212                         instr->alu.add.magic_write = true;
1213                         break;
1214                 }
1215         }
1216 
1217         return true;
1218 }
1219 
1220 static bool
v3d71_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1221 v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1222                      struct v3d_qpu_instr *instr)
1223 {
1224         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
1225         uint32_t raddr_a = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_A);
1226         uint32_t raddr_b = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_B);
1227         uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1228         uint32_t map_op = op;
1229 
1230         const struct opcode_desc *desc =
1231                 lookup_opcode_from_packed(devinfo,
1232                                           add_ops_v71,
1233                                           ARRAY_SIZE(add_ops_v71),
1234                                           map_op, 0, 0,
1235                                           raddr_b);
1236         if (!desc)
1237                 return false;
1238 
1239         instr->alu.add.op = desc->op;
1240 
1241         /* FADD/FADDNF and FMIN/FMAX are determined by the order of the
1242          * operands.
1243          */
1244         if (instr->sig.small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a >
1245             instr->sig.small_imm_b * 256 + (op & 3) * 64 + raddr_b) {
1246                 if (instr->alu.add.op == V3D_QPU_A_FMIN)
1247                         instr->alu.add.op = V3D_QPU_A_FMAX;
1248                 if (instr->alu.add.op == V3D_QPU_A_FADD)
1249                         instr->alu.add.op = V3D_QPU_A_FADDNF;
1250         }
1251 
1252         /* Some QPU ops require a bit more than just basic opcode and mux a/b
1253          * comparisons to distinguish them.
1254          */
1255         switch (instr->alu.add.op) {
1256         case V3D_QPU_A_STVPMV:
1257         case V3D_QPU_A_STVPMD:
1258         case V3D_QPU_A_STVPMP:
1259                 switch (waddr) {
1260                 case 0:
1261                         instr->alu.add.op = V3D_QPU_A_STVPMV;
1262                         break;
1263                 case 1:
1264                         instr->alu.add.op = V3D_QPU_A_STVPMD;
1265                         break;
1266                 case 2:
1267                         instr->alu.add.op = V3D_QPU_A_STVPMP;
1268                         break;
1269                 default:
1270                         return false;
1271                 }
1272                 break;
1273         default:
1274                 break;
1275         }
1276 
1277         switch (instr->alu.add.op) {
1278         case V3D_QPU_A_FADD:
1279         case V3D_QPU_A_FADDNF:
1280         case V3D_QPU_A_FSUB:
1281         case V3D_QPU_A_FMIN:
1282         case V3D_QPU_A_FMAX:
1283         case V3D_QPU_A_FCMP:
1284         case V3D_QPU_A_VFPACK:
1285                 if (instr->alu.add.op != V3D_QPU_A_VFPACK &&
1286                     instr->alu.add.op != V3D_QPU_A_FCMP) {
1287                         instr->alu.add.output_pack = (op >> 4) & 0x3;
1288                 } else {
1289                         instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1290                 }
1291 
1292                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1293                                                    &instr->alu.add.a.unpack)) {
1294                         return false;
1295                 }
1296 
1297                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
1298                                                    &instr->alu.add.b.unpack)) {
1299                         return false;
1300                 }
1301                 break;
1302 
1303         case V3D_QPU_A_FFLOOR:
1304         case V3D_QPU_A_FROUND:
1305         case V3D_QPU_A_FTRUNC:
1306         case V3D_QPU_A_FCEIL:
1307         case V3D_QPU_A_FDX:
1308         case V3D_QPU_A_FDY:
1309                 instr->alu.add.output_pack = raddr_b & 0x3;
1310 
1311                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1312                                                    &instr->alu.add.a.unpack)) {
1313                         return false;
1314                 }
1315                 break;
1316 
1317         case V3D_QPU_A_FTOIN:
1318         case V3D_QPU_A_FTOIZ:
1319         case V3D_QPU_A_FTOUZ:
1320         case V3D_QPU_A_FTOC:
1321                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1322 
1323                 if (!v3d_qpu_float32_unpack_unpack((raddr_b >> 2) & 0x3,
1324                                                    &instr->alu.add.a.unpack)) {
1325                         return false;
1326                 }
1327                 break;
1328 
1329         case V3D_QPU_A_VFMIN:
1330         case V3D_QPU_A_VFMAX:
1331                 unreachable("pending v71 update");
1332                 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
1333                                                    &instr->alu.add.a.unpack)) {
1334                         return false;
1335                 }
1336 
1337                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1338                 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1339                 break;
1340 
1341         case V3D_QPU_A_MOV:
1342                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1343 
1344                 if (!v3d_qpu_int32_unpack_unpack((raddr_b >> 2) & 0x7,
1345                                                  &instr->alu.add.a.unpack)) {
1346                         return false;
1347                 }
1348                 break;
1349 
1350         case V3D_QPU_A_FMOV:
1351                 instr->alu.add.output_pack = raddr_b & 0x3;
1352 
1353                 /* Mul alu FMOV has one additional variant */
1354                 int32_t unpack = (raddr_b >> 2) & 0x7;
1355                 if (unpack == 7)
1356                         return false;
1357 
1358                 if (!v3d_qpu_float32_unpack_unpack(unpack,
1359                                                    &instr->alu.add.a.unpack)) {
1360                         return false;
1361                 }
1362                 break;
1363 
1364         default:
1365                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1366                 instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
1367                 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1368                 break;
1369         }
1370 
1371         instr->alu.add.a.raddr = raddr_a;
1372         instr->alu.add.b.raddr = raddr_b;
1373         instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1374 
1375         instr->alu.add.magic_write = false;
1376         if (packed_inst & V3D_QPU_MA) {
1377                 switch (instr->alu.add.op) {
1378                 case V3D_QPU_A_LDVPMV_IN:
1379                         instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
1380                         break;
1381                 case V3D_QPU_A_LDVPMD_IN:
1382                         instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
1383                         break;
1384                 case V3D_QPU_A_LDVPMG_IN:
1385                         instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
1386                         break;
1387                 default:
1388                         instr->alu.add.magic_write = true;
1389                         break;
1390                 }
1391         }
1392 
1393         return true;
1394 }
1395 
1396 static bool
v3d_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1397 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1398                    struct v3d_qpu_instr *instr)
1399 {
1400         if (devinfo->ver < 71)
1401                 return v3d33_qpu_add_unpack(devinfo, packed_inst, instr);
1402         else
1403                 return v3d71_qpu_add_unpack(devinfo, packed_inst, instr);
1404 }
1405 
1406 static bool
v3d33_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1407 v3d33_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1408                      struct v3d_qpu_instr *instr)
1409 {
1410         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
1411         uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
1412         uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
1413 
1414         {
1415                 const struct opcode_desc *desc =
1416                         lookup_opcode_from_packed(devinfo,
1417                                                   mul_ops_v33,
1418                                                   ARRAY_SIZE(mul_ops_v33),
1419                                                   op, mux_a, mux_b, 0);
1420                 if (!desc)
1421                         return false;
1422 
1423                 instr->alu.mul.op = desc->op;
1424         }
1425 
1426         switch (instr->alu.mul.op) {
1427         case V3D_QPU_M_FMUL:
1428                 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
1429 
1430                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1431                                                    &instr->alu.mul.a.unpack)) {
1432                         return false;
1433                 }
1434 
1435                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
1436                                                    &instr->alu.mul.b.unpack)) {
1437                         return false;
1438                 }
1439 
1440                 break;
1441 
1442         case V3D_QPU_M_FMOV:
1443                 instr->alu.mul.output_pack = (((op & 1) << 1) +
1444                                               ((mux_b >> 2) & 1));
1445 
1446                 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
1447                                                    &instr->alu.mul.a.unpack)) {
1448                         return false;
1449                 }
1450 
1451                 break;
1452 
1453         case V3D_QPU_M_VFMUL:
1454                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1455 
1456                 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
1457                                                    &instr->alu.mul.a.unpack)) {
1458                         return false;
1459                 }
1460 
1461                 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1462 
1463                 break;
1464 
1465         default:
1466                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1467                 instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
1468                 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1469                 break;
1470         }
1471 
1472         instr->alu.mul.a.mux = mux_a;
1473         instr->alu.mul.b.mux = mux_b;
1474         instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
1475         instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
1476 
1477         return true;
1478 }
1479 
1480 static bool
v3d71_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1481 v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1482                      struct v3d_qpu_instr *instr)
1483 {
1484         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
1485         uint32_t raddr_c = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_C);
1486         uint32_t raddr_d = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_D);
1487 
1488         {
1489                 const struct opcode_desc *desc =
1490                         lookup_opcode_from_packed(devinfo,
1491                                                   mul_ops_v71,
1492                                                   ARRAY_SIZE(mul_ops_v71),
1493                                                   op, 0, 0,
1494                                                   raddr_d);
1495                 if (!desc)
1496                         return false;
1497 
1498                 instr->alu.mul.op = desc->op;
1499         }
1500 
1501         switch (instr->alu.mul.op) {
1502         case V3D_QPU_M_FMUL:
1503                 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
1504 
1505                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
1506                                                    &instr->alu.mul.a.unpack)) {
1507                         return false;
1508                 }
1509 
1510                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
1511                                                    &instr->alu.mul.b.unpack)) {
1512                         return false;
1513                 }
1514 
1515                 break;
1516 
1517         case V3D_QPU_M_FMOV:
1518                 instr->alu.mul.output_pack = raddr_d & 0x3;
1519 
1520                 if (!v3d_qpu_float32_unpack_unpack((raddr_d >> 2) & 0x7,
1521                                                    &instr->alu.mul.a.unpack)) {
1522                         return false;
1523                 }
1524 
1525                 break;
1526 
1527         case V3D_QPU_M_VFMUL:
1528                 unreachable("pending v71 update");
1529                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1530 
1531                 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
1532                                                    &instr->alu.mul.a.unpack)) {
1533                         return false;
1534                 }
1535 
1536                 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1537 
1538                 break;
1539 
1540         case V3D_QPU_M_MOV:
1541                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1542 
1543                 if (!v3d_qpu_int32_unpack_unpack((raddr_d >> 2) & 0x7,
1544                                                  &instr->alu.mul.a.unpack)) {
1545                         return false;
1546                 }
1547                 break;
1548 
1549         default:
1550                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1551                 instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
1552                 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1553                 break;
1554         }
1555 
1556         instr->alu.mul.a.raddr = raddr_c;
1557         instr->alu.mul.b.raddr = raddr_d;
1558         instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
1559         instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
1560 
1561         return true;
1562 }
1563 
1564 static bool
v3d_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1565 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1566                    struct v3d_qpu_instr *instr)
1567 {
1568         if (devinfo->ver < 71)
1569                 return v3d33_qpu_mul_unpack(devinfo, packed_inst, instr);
1570         else
1571                 return v3d71_qpu_mul_unpack(devinfo, packed_inst, instr);
1572 }
1573 
1574 static const struct opcode_desc *
lookup_opcode_from_instr(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint8_t op)1575 lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
1576                          const struct opcode_desc *opcodes, size_t num_opcodes,
1577                          uint8_t op)
1578 {
1579         for (int i = 0; i < num_opcodes; i++) {
1580                 const struct opcode_desc *op_desc = &opcodes[i];
1581 
1582                 if (op_desc->op != op)
1583                         continue;
1584 
1585                 if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver))
1586                         continue;
1587 
1588                 return op_desc;
1589         }
1590 
1591         return NULL;
1592 }
1593 
1594 static bool
v3d33_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1595 v3d33_qpu_add_pack(const struct v3d_device_info *devinfo,
1596                    const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1597 {
1598         uint32_t waddr = instr->alu.add.waddr;
1599         uint32_t mux_a = instr->alu.add.a.mux;
1600         uint32_t mux_b = instr->alu.add.b.mux;
1601         int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
1602         const struct opcode_desc *desc =
1603                 lookup_opcode_from_instr(devinfo, add_ops_v33,
1604                                          ARRAY_SIZE(add_ops_v33),
1605                                          instr->alu.add.op);
1606 
1607         if (!desc)
1608                 return false;
1609 
1610         uint32_t opcode = desc->opcode_first;
1611 
1612         /* If an operation doesn't use an arg, its mux values may be used to
1613          * identify the operation type.
1614          */
1615         if (nsrc < 2)
1616                 mux_b = ffs(desc->mux.b_mask) - 1;
1617 
1618         if (nsrc < 1)
1619                 mux_a = ffs(desc->mux.a_mask) - 1;
1620 
1621         bool no_magic_write = false;
1622 
1623         switch (instr->alu.add.op) {
1624         case V3D_QPU_A_STVPMV:
1625                 waddr = 0;
1626                 no_magic_write = true;
1627                 break;
1628         case V3D_QPU_A_STVPMD:
1629                 waddr = 1;
1630                 no_magic_write = true;
1631                 break;
1632         case V3D_QPU_A_STVPMP:
1633                 waddr = 2;
1634                 no_magic_write = true;
1635                 break;
1636 
1637         case V3D_QPU_A_LDVPMV_IN:
1638         case V3D_QPU_A_LDVPMD_IN:
1639         case V3D_QPU_A_LDVPMP:
1640         case V3D_QPU_A_LDVPMG_IN:
1641                 assert(!instr->alu.add.magic_write);
1642                 break;
1643 
1644         case V3D_QPU_A_LDVPMV_OUT:
1645         case V3D_QPU_A_LDVPMD_OUT:
1646         case V3D_QPU_A_LDVPMG_OUT:
1647                 assert(!instr->alu.add.magic_write);
1648                 *packed_instr |= V3D_QPU_MA;
1649                 break;
1650 
1651         default:
1652                 break;
1653         }
1654 
1655         switch (instr->alu.add.op) {
1656         case V3D_QPU_A_FADD:
1657         case V3D_QPU_A_FADDNF:
1658         case V3D_QPU_A_FSUB:
1659         case V3D_QPU_A_FMIN:
1660         case V3D_QPU_A_FMAX:
1661         case V3D_QPU_A_FCMP: {
1662                 uint32_t output_pack;
1663                 uint32_t a_unpack;
1664                 uint32_t b_unpack;
1665 
1666                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1667                                                &output_pack)) {
1668                         return false;
1669                 }
1670                 opcode |= output_pack << 4;
1671 
1672                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1673                                                  &a_unpack)) {
1674                         return false;
1675                 }
1676 
1677                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
1678                                                  &b_unpack)) {
1679                         return false;
1680                 }
1681 
1682                 /* These operations with commutative operands are
1683                  * distinguished by which order their operands come in.
1684                  */
1685                 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1686                 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1687                       instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1688                     ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1689                       instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1690                         uint32_t temp;
1691 
1692                         temp = a_unpack;
1693                         a_unpack = b_unpack;
1694                         b_unpack = temp;
1695 
1696                         temp = mux_a;
1697                         mux_a = mux_b;
1698                         mux_b = temp;
1699                 }
1700 
1701                 opcode |= a_unpack << 2;
1702                 opcode |= b_unpack << 0;
1703 
1704                 break;
1705         }
1706 
1707         case V3D_QPU_A_VFPACK: {
1708                 uint32_t a_unpack;
1709                 uint32_t b_unpack;
1710 
1711                 if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
1712                     instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
1713                         return false;
1714                 }
1715 
1716                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1717                                                  &a_unpack)) {
1718                         return false;
1719                 }
1720 
1721                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
1722                                                  &b_unpack)) {
1723                         return false;
1724                 }
1725 
1726                 opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2);
1727                 opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0);
1728 
1729                 break;
1730         }
1731 
1732         case V3D_QPU_A_FFLOOR:
1733         case V3D_QPU_A_FROUND:
1734         case V3D_QPU_A_FTRUNC:
1735         case V3D_QPU_A_FCEIL:
1736         case V3D_QPU_A_FDX:
1737         case V3D_QPU_A_FDY: {
1738                 uint32_t packed;
1739 
1740                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1741                                                &packed)) {
1742                         return false;
1743                 }
1744                 mux_b |= packed;
1745 
1746                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1747                                                  &packed)) {
1748                         return false;
1749                 }
1750                 if (packed == 0)
1751                         return false;
1752                 opcode = (opcode & ~(0x3 << 2)) | packed << 2;
1753                 break;
1754         }
1755 
1756         case V3D_QPU_A_FTOIN:
1757         case V3D_QPU_A_FTOIZ:
1758         case V3D_QPU_A_FTOUZ:
1759         case V3D_QPU_A_FTOC:
1760                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1761                         return false;
1762 
1763                 uint32_t packed;
1764                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1765                                                  &packed)) {
1766                         return false;
1767                 }
1768                 if (packed == 0)
1769                         return false;
1770                 opcode |= packed << 2;
1771 
1772                 break;
1773 
1774         case V3D_QPU_A_VFMIN:
1775         case V3D_QPU_A_VFMAX:
1776                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1777                     instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
1778                         return false;
1779                 }
1780 
1781                 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
1782                                                  &packed)) {
1783                         return false;
1784                 }
1785                 opcode |= packed;
1786                 break;
1787 
1788         default:
1789                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
1790                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1791                      instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
1792                      instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
1793                         return false;
1794                 }
1795                 break;
1796         }
1797 
1798         *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);
1799         *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);
1800         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
1801         *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1802         if (instr->alu.add.magic_write && !no_magic_write)
1803                 *packed_instr |= V3D_QPU_MA;
1804 
1805         return true;
1806 }
1807 
1808 static bool
v3d71_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1809 v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
1810                    const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1811 {
1812         uint32_t waddr = instr->alu.add.waddr;
1813         uint32_t raddr_a = instr->alu.add.a.raddr;
1814         uint32_t raddr_b = instr->alu.add.b.raddr;
1815 
1816         int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
1817         const struct opcode_desc *desc =
1818                 lookup_opcode_from_instr(devinfo, add_ops_v71,
1819                                          ARRAY_SIZE(add_ops_v71),
1820                                          instr->alu.add.op);
1821         if (!desc)
1822                 return false;
1823 
1824         uint32_t opcode = desc->opcode_first;
1825 
1826         /* If an operation doesn't use an arg, its raddr values may be used to
1827          * identify the operation type.
1828          */
1829         if (nsrc < 2)
1830                 raddr_b = ffsll(desc->raddr_mask) - 1;
1831 
1832         bool no_magic_write = false;
1833 
1834         switch (instr->alu.add.op) {
1835         case V3D_QPU_A_STVPMV:
1836                 waddr = 0;
1837                 no_magic_write = true;
1838                 break;
1839         case V3D_QPU_A_STVPMD:
1840                 waddr = 1;
1841                 no_magic_write = true;
1842                 break;
1843         case V3D_QPU_A_STVPMP:
1844                 waddr = 2;
1845                 no_magic_write = true;
1846                 break;
1847 
1848         case V3D_QPU_A_LDVPMV_IN:
1849         case V3D_QPU_A_LDVPMD_IN:
1850         case V3D_QPU_A_LDVPMP:
1851         case V3D_QPU_A_LDVPMG_IN:
1852                 assert(!instr->alu.add.magic_write);
1853                 break;
1854 
1855         case V3D_QPU_A_LDVPMV_OUT:
1856         case V3D_QPU_A_LDVPMD_OUT:
1857         case V3D_QPU_A_LDVPMG_OUT:
1858                 assert(!instr->alu.add.magic_write);
1859                 *packed_instr |= V3D_QPU_MA;
1860                 break;
1861 
1862         default:
1863                 break;
1864         }
1865 
1866         switch (instr->alu.add.op) {
1867         case V3D_QPU_A_FADD:
1868         case V3D_QPU_A_FADDNF:
1869         case V3D_QPU_A_FSUB:
1870         case V3D_QPU_A_FMIN:
1871         case V3D_QPU_A_FMAX:
1872         case V3D_QPU_A_FCMP: {
1873                 uint32_t output_pack;
1874                 uint32_t a_unpack;
1875                 uint32_t b_unpack;
1876 
1877                 if (instr->alu.add.op != V3D_QPU_A_FCMP) {
1878                         if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1879                                                        &output_pack)) {
1880                                 return false;
1881                         }
1882                         opcode |= output_pack << 4;
1883                 }
1884 
1885                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1886                                                  &a_unpack)) {
1887                         return false;
1888                 }
1889 
1890                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
1891                                                  &b_unpack)) {
1892                         return false;
1893                 }
1894 
1895                 /* These operations with commutative operands are
1896                  * distinguished by the order of the operands come in.
1897                  */
1898                 bool ordering =
1899                         instr->sig.small_imm_a * 256 + a_unpack * 64 + raddr_a >
1900                         instr->sig.small_imm_b * 256 + b_unpack * 64 + raddr_b;
1901                 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1902                       instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1903                     ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1904                       instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1905                         uint32_t temp;
1906 
1907                         temp = a_unpack;
1908                         a_unpack = b_unpack;
1909                         b_unpack = temp;
1910 
1911                         temp = raddr_a;
1912                         raddr_a = raddr_b;
1913                         raddr_b = temp;
1914 
1915                         /* If we are swapping raddr_a/b we also need to swap
1916                          * small_imm_a/b.
1917                          */
1918                         if (instr->sig.small_imm_a || instr->sig.small_imm_b) {
1919                                 assert(instr->sig.small_imm_a !=
1920                                        instr->sig.small_imm_b);
1921                                 struct v3d_qpu_sig new_sig = instr->sig;
1922                                 new_sig.small_imm_a = !instr->sig.small_imm_a;
1923                                 new_sig.small_imm_b = !instr->sig.small_imm_b;
1924                                 uint32_t sig;
1925                                 if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
1926                                     return false;
1927                             *packed_instr &= ~V3D_QPU_SIG_MASK;
1928                             *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
1929                         }
1930                 }
1931 
1932                 opcode |= a_unpack << 2;
1933                 opcode |= b_unpack << 0;
1934 
1935                 break;
1936         }
1937 
1938         case V3D_QPU_A_VFPACK: {
1939                 uint32_t a_unpack;
1940                 uint32_t b_unpack;
1941 
1942                 if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
1943                     instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
1944                         return false;
1945                 }
1946 
1947                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1948                                                  &a_unpack)) {
1949                         return false;
1950                 }
1951 
1952                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack,
1953                                                  &b_unpack)) {
1954                         return false;
1955                 }
1956 
1957                 opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2);
1958                 opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0);
1959 
1960                 break;
1961         }
1962 
1963         case V3D_QPU_A_FFLOOR:
1964         case V3D_QPU_A_FROUND:
1965         case V3D_QPU_A_FTRUNC:
1966         case V3D_QPU_A_FCEIL:
1967         case V3D_QPU_A_FDX:
1968         case V3D_QPU_A_FDY: {
1969                 uint32_t packed;
1970 
1971                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1972                                                &packed)) {
1973                         return false;
1974                 }
1975                 raddr_b |= packed;
1976 
1977                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1978                                                  &packed)) {
1979                         return false;
1980                 }
1981                 if (packed == 0)
1982                         return false;
1983                 raddr_b = (raddr_b & ~(0x3 << 2)) | packed << 2;
1984                 break;
1985         }
1986 
1987         case V3D_QPU_A_FTOIN:
1988         case V3D_QPU_A_FTOIZ:
1989         case V3D_QPU_A_FTOUZ:
1990         case V3D_QPU_A_FTOC:
1991                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1992                         return false;
1993 
1994                 uint32_t packed;
1995                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
1996                                                  &packed)) {
1997                         return false;
1998                 }
1999                 if (packed == 0)
2000                         return false;
2001 
2002                 raddr_b |= (raddr_b & ~(0x3 << 2)) | packed << 2;
2003 
2004                 break;
2005 
2006         case V3D_QPU_A_VFMIN:
2007         case V3D_QPU_A_VFMAX:
2008                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
2009                     instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
2010                         return false;
2011                 }
2012 
2013                 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
2014                                                  &packed)) {
2015                         return false;
2016                 }
2017                 opcode |= packed;
2018                 break;
2019 
2020         case V3D_QPU_A_MOV: {
2021                 uint32_t packed;
2022 
2023                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
2024                         return false;
2025 
2026                 if (!v3d_qpu_int32_unpack_pack(instr->alu.add.a.unpack,
2027                                                &packed)) {
2028                         return false;
2029                 }
2030 
2031                 raddr_b |= packed << 2;
2032                 break;
2033         }
2034 
2035         case V3D_QPU_A_FMOV: {
2036                 uint32_t packed;
2037 
2038                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
2039                                                &packed)) {
2040                         return false;
2041                 }
2042                 raddr_b = packed;
2043 
2044                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack,
2045                                                  &packed)) {
2046                         return false;
2047                 }
2048                 raddr_b |= packed << 2;
2049                 break;
2050         }
2051 
2052         default:
2053                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
2054                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
2055                      instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
2056                      instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
2057                         return false;
2058                 }
2059                 break;
2060         }
2061 
2062         *packed_instr |= QPU_SET_FIELD(raddr_a, V3D_QPU_RADDR_A);
2063         *packed_instr |= QPU_SET_FIELD(raddr_b, V3D_QPU_RADDR_B);
2064         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
2065         *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
2066         if (instr->alu.add.magic_write && !no_magic_write)
2067                 *packed_instr |= V3D_QPU_MA;
2068 
2069         return true;
2070 }
2071 
2072 static bool
v3d33_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2073 v3d33_qpu_mul_pack(const struct v3d_device_info *devinfo,
2074                    const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2075 {
2076         uint32_t mux_a = instr->alu.mul.a.mux;
2077         uint32_t mux_b = instr->alu.mul.b.mux;
2078         int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
2079 
2080         const struct opcode_desc *desc =
2081                 lookup_opcode_from_instr(devinfo, mul_ops_v33,
2082                                          ARRAY_SIZE(mul_ops_v33),
2083                                          instr->alu.mul.op);
2084 
2085         if (!desc)
2086                 return false;
2087 
2088         uint32_t opcode = desc->opcode_first;
2089 
2090         /* Some opcodes have a single valid value for their mux a/b, so set
2091          * that here.  If mux a/b determine packing, it will be set below.
2092          */
2093         if (nsrc < 2)
2094                 mux_b = ffs(desc->mux.b_mask) - 1;
2095 
2096         if (nsrc < 1)
2097                 mux_a = ffs(desc->mux.a_mask) - 1;
2098 
2099         switch (instr->alu.mul.op) {
2100         case V3D_QPU_M_FMUL: {
2101                 uint32_t packed;
2102 
2103                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2104                                                &packed)) {
2105                         return false;
2106                 }
2107                 /* No need for a +1 because desc->opcode_first has a 1 in this
2108                  * field.
2109                  */
2110                 opcode += packed << 4;
2111 
2112                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
2113                                                  &packed)) {
2114                         return false;
2115                 }
2116                 opcode |= packed << 2;
2117 
2118                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b.unpack,
2119                                                  &packed)) {
2120                         return false;
2121                 }
2122                 opcode |= packed << 0;
2123                 break;
2124         }
2125 
2126         case V3D_QPU_M_FMOV: {
2127                 uint32_t packed;
2128 
2129                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2130                                                &packed)) {
2131                         return false;
2132                 }
2133                 opcode |= (packed >> 1) & 1;
2134                 mux_b = (packed & 1) << 2;
2135 
2136                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
2137                                                  &packed)) {
2138                         return false;
2139                 }
2140                 mux_b |= packed;
2141                 break;
2142         }
2143 
2144         case V3D_QPU_M_VFMUL: {
2145                 uint32_t packed;
2146 
2147                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2148                         return false;
2149 
2150                 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
2151                                                  &packed)) {
2152                         return false;
2153                 }
2154                 if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
2155                         opcode = 8;
2156                 else
2157                         opcode |= (packed + 4) & 7;
2158 
2159                 if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
2160                         return false;
2161 
2162                 break;
2163         }
2164 
2165         default:
2166                 if (instr->alu.mul.op != V3D_QPU_M_NOP &&
2167                     (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
2168                      instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
2169                      instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
2170                         return false;
2171                 }
2172                 break;
2173         }
2174 
2175         *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);
2176         *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);
2177 
2178         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
2179         *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
2180         if (instr->alu.mul.magic_write)
2181                 *packed_instr |= V3D_QPU_MM;
2182 
2183         return true;
2184 }
2185 
2186 static bool
v3d71_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2187 v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo,
2188                    const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2189 {
2190         uint32_t raddr_c = instr->alu.mul.a.raddr;
2191         uint32_t raddr_d = instr->alu.mul.b.raddr;
2192         int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
2193 
2194         const struct opcode_desc *desc =
2195                 lookup_opcode_from_instr(devinfo, mul_ops_v71,
2196                                          ARRAY_SIZE(mul_ops_v71),
2197                                          instr->alu.mul.op);
2198         if (!desc)
2199                 return false;
2200 
2201         uint32_t opcode = desc->opcode_first;
2202 
2203         /* Some opcodes have a single valid value for their raddr_d, so set
2204          * that here.  If raddr_b determine packing, it will be set below.
2205          */
2206         if (nsrc < 2)
2207                 raddr_d = ffsll(desc->raddr_mask) - 1;
2208 
2209         switch (instr->alu.mul.op) {
2210         case V3D_QPU_M_FMUL: {
2211                 uint32_t packed;
2212 
2213                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2214                                                &packed)) {
2215                         return false;
2216                 }
2217                 /* No need for a +1 because desc->opcode_first has a 1 in this
2218                  * field.
2219                  */
2220                 opcode += packed << 4;
2221 
2222                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
2223                                                  &packed)) {
2224                         return false;
2225                 }
2226                 opcode |= packed << 2;
2227 
2228                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b.unpack,
2229                                                  &packed)) {
2230                         return false;
2231                 }
2232                 opcode |= packed << 0;
2233                 break;
2234         }
2235 
2236         case V3D_QPU_M_FMOV: {
2237                 uint32_t packed;
2238 
2239                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2240                                                &packed)) {
2241                         return false;
2242                 }
2243                 raddr_d |= packed;
2244 
2245                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack,
2246                                                  &packed)) {
2247                         return false;
2248                 }
2249                 raddr_d |= packed << 2;
2250                 break;
2251         }
2252 
2253         case V3D_QPU_M_VFMUL: {
2254                 unreachable("pending v71 update");
2255                 uint32_t packed;
2256 
2257                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2258                         return false;
2259 
2260                 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
2261                                                  &packed)) {
2262                         return false;
2263                 }
2264                 if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
2265                         opcode = 8;
2266                 else
2267                         opcode |= (packed + 4) & 7;
2268 
2269                 if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
2270                         return false;
2271 
2272                 break;
2273         }
2274 
2275         case V3D_QPU_M_MOV: {
2276                 uint32_t packed;
2277 
2278                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2279                         return false;
2280 
2281                 if (!v3d_qpu_int32_unpack_pack(instr->alu.mul.a.unpack,
2282                                                &packed)) {
2283                         return false;
2284                 }
2285 
2286                 raddr_d |= packed << 2;
2287                 break;
2288         }
2289 
2290         default:
2291                 if (instr->alu.mul.op != V3D_QPU_M_NOP &&
2292                     (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
2293                      instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
2294                      instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
2295                         return false;
2296                 }
2297                 break;
2298         }
2299 
2300         *packed_instr |= QPU_SET_FIELD(raddr_c, V3D_QPU_RADDR_C);
2301         *packed_instr |= QPU_SET_FIELD(raddr_d, V3D_QPU_RADDR_D);
2302         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
2303         *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
2304         if (instr->alu.mul.magic_write)
2305                 *packed_instr |= V3D_QPU_MM;
2306 
2307         return true;
2308 }
2309 
2310 static bool
v3d_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2311 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
2312                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2313 {
2314         if (devinfo->ver < 71)
2315                 return v3d33_qpu_add_pack(devinfo, instr, packed_instr);
2316         else
2317                 return v3d71_qpu_add_pack(devinfo, instr, packed_instr);
2318 }
2319 
2320 static bool
v3d_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2321 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
2322                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2323 {
2324         if (devinfo->ver < 71)
2325                 return v3d33_qpu_mul_pack(devinfo, instr, packed_instr);
2326         else
2327                 return v3d71_qpu_mul_pack(devinfo, instr, packed_instr);
2328 }
2329 
2330 static bool
v3d_qpu_instr_unpack_alu(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2331 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
2332                          uint64_t packed_instr,
2333                          struct v3d_qpu_instr *instr)
2334 {
2335         instr->type = V3D_QPU_INSTR_TYPE_ALU;
2336 
2337         if (!v3d_qpu_sig_unpack(devinfo,
2338                                 QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),
2339                                 &instr->sig))
2340                 return false;
2341 
2342         uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);
2343         if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
2344                 instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;
2345                 instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;
2346 
2347                 instr->flags.ac = V3D_QPU_COND_NONE;
2348                 instr->flags.mc = V3D_QPU_COND_NONE;
2349                 instr->flags.apf = V3D_QPU_PF_NONE;
2350                 instr->flags.mpf = V3D_QPU_PF_NONE;
2351                 instr->flags.auf = V3D_QPU_UF_NONE;
2352                 instr->flags.muf = V3D_QPU_UF_NONE;
2353         } else {
2354                 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
2355                         return false;
2356         }
2357 
2358         if (devinfo->ver <= 71) {
2359                 /*
2360                  * For v71 this will be set on add/mul unpack, as raddr are now
2361                  * part of v3d_qpu_input
2362                  */
2363                 instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
2364                 instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
2365         }
2366 
2367         if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
2368                 return false;
2369 
2370         if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
2371                 return false;
2372 
2373         return true;
2374 }
2375 
2376 static bool
v3d_qpu_instr_unpack_branch(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2377 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
2378                             uint64_t packed_instr,
2379                             struct v3d_qpu_instr *instr)
2380 {
2381         instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
2382 
2383         uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);
2384         if (cond == 0)
2385                 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
2386         else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
2387                  V3D_QPU_BRANCH_COND_ALLNA)
2388                 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
2389         else
2390                 return false;
2391 
2392         uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);
2393         if (msfign == 3)
2394                 return false;
2395         instr->branch.msfign = msfign;
2396 
2397         instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);
2398 
2399         instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;
2400         if (instr->branch.ub) {
2401                 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
2402                                                   V3D_QPU_BRANCH_BDU);
2403         }
2404 
2405         instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
2406                                               V3D_QPU_RADDR_A);
2407 
2408         instr->branch.offset = 0;
2409 
2410         instr->branch.offset +=
2411                 QPU_GET_FIELD(packed_instr,
2412                               V3D_QPU_BRANCH_ADDR_LOW) << 3;
2413 
2414         instr->branch.offset +=
2415                 QPU_GET_FIELD(packed_instr,
2416                               V3D_QPU_BRANCH_ADDR_HIGH) << 24;
2417 
2418         return true;
2419 }
2420 
2421 bool
v3d_qpu_instr_unpack(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2422 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
2423                      uint64_t packed_instr,
2424                      struct v3d_qpu_instr *instr)
2425 {
2426         if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {
2427                 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
2428         } else {
2429                 uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);
2430 
2431                 if ((sig & 24) == 16) {
2432                         return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
2433                                                            instr);
2434                 } else {
2435                         return false;
2436                 }
2437         }
2438 }
2439 
2440 static bool
v3d_qpu_instr_pack_alu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2441 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
2442                        const struct v3d_qpu_instr *instr,
2443                        uint64_t *packed_instr)
2444 {
2445         uint32_t sig;
2446         if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
2447                 return false;
2448         *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
2449 
2450         if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
2451                 if (devinfo->ver < 71) {
2452                         /*
2453                          * For v71 this will be set on add/mul unpack, as raddr are now
2454                          * part of v3d_qpu_input
2455                          */
2456                         *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
2457                         *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
2458                 }
2459 
2460                 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
2461                         return false;
2462                 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
2463                         return false;
2464 
2465                 uint32_t flags;
2466                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
2467                         if (instr->flags.ac != V3D_QPU_COND_NONE ||
2468                             instr->flags.mc != V3D_QPU_COND_NONE ||
2469                             instr->flags.apf != V3D_QPU_PF_NONE ||
2470                             instr->flags.mpf != V3D_QPU_PF_NONE ||
2471                             instr->flags.auf != V3D_QPU_UF_NONE ||
2472                             instr->flags.muf != V3D_QPU_UF_NONE) {
2473                                 return false;
2474                         }
2475 
2476                         flags = instr->sig_addr;
2477                         if (instr->sig_magic)
2478                                 flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;
2479                 } else {
2480                         if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
2481                                 return false;
2482                 }
2483 
2484                 *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);
2485         } else {
2486                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
2487                         return false;
2488         }
2489 
2490         return true;
2491 }
2492 
2493 static bool
v3d_qpu_instr_pack_branch(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2494 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
2495                           const struct v3d_qpu_instr *instr,
2496                           uint64_t *packed_instr)
2497 {
2498         *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);
2499 
2500         if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
2501                 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
2502                                                     V3D_QPU_BRANCH_COND_A0),
2503                                                V3D_QPU_BRANCH_COND);
2504         }
2505 
2506         *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
2507                                        V3D_QPU_BRANCH_MSFIGN);
2508 
2509         *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
2510                                        V3D_QPU_BRANCH_BDI);
2511 
2512         if (instr->branch.ub) {
2513                 *packed_instr |= V3D_QPU_BRANCH_UB;
2514                 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
2515                                                V3D_QPU_BRANCH_BDU);
2516         }
2517 
2518         switch (instr->branch.bdi) {
2519         case V3D_QPU_BRANCH_DEST_ABS:
2520         case V3D_QPU_BRANCH_DEST_REL:
2521                 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
2522                                                V3D_QPU_BRANCH_MSFIGN);
2523 
2524                 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
2525                                                 ~0xff000000) >> 3,
2526                                                V3D_QPU_BRANCH_ADDR_LOW);
2527 
2528                 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
2529                                                V3D_QPU_BRANCH_ADDR_HIGH);
2530                 break;
2531         default:
2532                 break;
2533         }
2534 
2535         if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
2536             instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
2537                 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
2538                                                V3D_QPU_RADDR_A);
2539         }
2540 
2541         return true;
2542 }
2543 
2544 bool
v3d_qpu_instr_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2545 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
2546                    const struct v3d_qpu_instr *instr,
2547                    uint64_t *packed_instr)
2548 {
2549         *packed_instr = 0;
2550 
2551         switch (instr->type) {
2552         case V3D_QPU_INSTR_TYPE_ALU:
2553                 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
2554         case V3D_QPU_INSTR_TYPE_BRANCH:
2555                 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
2556         default:
2557                 return false;
2558         }
2559 }
2560