• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <string.h>
25 #include "util/macros.h"
26 #include "util/bitscan.h"
27 
28 #include "broadcom/common/v3d_device_info.h"
29 #include "qpu_instr.h"
30 
31 #ifndef QPU_MASK
32 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33 /* Using the GNU statement expression extension */
34 #define QPU_SET_FIELD(value, field)                                       \
35         ({                                                                \
36                 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37                 assert((fieldval & ~ field ## _MASK) == 0);               \
38                 fieldval & field ## _MASK;                                \
39          })
40 
41 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
42 
43 #define QPU_UPDATE_FIELD(inst, value, field)                              \
44         (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
45 #endif /* QPU_MASK */
46 
47 #define V3D_QPU_OP_MUL_SHIFT                58
48 #define V3D_QPU_OP_MUL_MASK                 QPU_MASK(63, 58)
49 
50 #define V3D_QPU_SIG_SHIFT                   53
51 #define V3D_QPU_SIG_MASK                    QPU_MASK(57, 53)
52 
53 #define V3D_QPU_COND_SHIFT                  46
54 #define V3D_QPU_COND_MASK                   QPU_MASK(52, 46)
55 #define V3D_QPU_COND_SIG_MAGIC_ADDR         (1 << 6)
56 
57 #define V3D_QPU_MM                          QPU_MASK(45, 45)
58 #define V3D_QPU_MA                          QPU_MASK(44, 44)
59 
60 #define V3D_QPU_WADDR_M_SHIFT               38
61 #define V3D_QPU_WADDR_M_MASK                QPU_MASK(43, 38)
62 
63 #define V3D_QPU_BRANCH_ADDR_LOW_SHIFT       35
64 #define V3D_QPU_BRANCH_ADDR_LOW_MASK        QPU_MASK(55, 35)
65 
66 #define V3D_QPU_WADDR_A_SHIFT               32
67 #define V3D_QPU_WADDR_A_MASK                QPU_MASK(37, 32)
68 
69 #define V3D_QPU_BRANCH_COND_SHIFT           32
70 #define V3D_QPU_BRANCH_COND_MASK            QPU_MASK(34, 32)
71 
72 #define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT      24
73 #define V3D_QPU_BRANCH_ADDR_HIGH_MASK       QPU_MASK(31, 24)
74 
75 #define V3D_QPU_OP_ADD_SHIFT                24
76 #define V3D_QPU_OP_ADD_MASK                 QPU_MASK(31, 24)
77 
78 #define V3D_QPU_MUL_B_SHIFT                 21
79 #define V3D_QPU_MUL_B_MASK                  QPU_MASK(23, 21)
80 
81 #define V3D_QPU_BRANCH_MSFIGN_SHIFT         21
82 #define V3D_QPU_BRANCH_MSFIGN_MASK          QPU_MASK(22, 21)
83 
84 #define V3D_QPU_MUL_A_SHIFT                 18
85 #define V3D_QPU_MUL_A_MASK                  QPU_MASK(20, 18)
86 
87 #define V3D_QPU_ADD_B_SHIFT                 15
88 #define V3D_QPU_ADD_B_MASK                  QPU_MASK(17, 15)
89 
90 #define V3D_QPU_BRANCH_BDU_SHIFT            15
91 #define V3D_QPU_BRANCH_BDU_MASK             QPU_MASK(17, 15)
92 
93 #define V3D_QPU_BRANCH_UB                   QPU_MASK(14, 14)
94 
95 #define V3D_QPU_ADD_A_SHIFT                 12
96 #define V3D_QPU_ADD_A_MASK                  QPU_MASK(14, 12)
97 
98 #define V3D_QPU_BRANCH_BDI_SHIFT            12
99 #define V3D_QPU_BRANCH_BDI_MASK             QPU_MASK(13, 12)
100 
101 #define V3D_QPU_RADDR_A_SHIFT               6
102 #define V3D_QPU_RADDR_A_MASK                QPU_MASK(11, 6)
103 
104 #define V3D_QPU_RADDR_B_SHIFT               0
105 #define V3D_QPU_RADDR_B_MASK                QPU_MASK(5, 0)
106 
107 #define THRSW .thrsw = true
108 #define LDUNIF .ldunif = true
109 #define LDUNIFRF .ldunifrf = true
110 #define LDUNIFA .ldunifa = true
111 #define LDUNIFARF .ldunifarf = true
112 #define LDTMU .ldtmu = true
113 #define LDVARY .ldvary = true
114 #define LDVPM .ldvpm = true
115 #define SMIMM .small_imm = true
116 #define LDTLB .ldtlb = true
117 #define LDTLBU .ldtlbu = true
118 #define UCB .ucb = true
119 #define ROT .rotate = true
120 #define WRTMUC .wrtmuc = true
121 
122 static const struct v3d_qpu_sig v33_sig_map[] = {
123         /*      MISC   R3       R4      R5 */
124         [0]  = {                               },
125         [1]  = { THRSW,                        },
126         [2]  = {                        LDUNIF },
127         [3]  = { THRSW,                 LDUNIF },
128         [4]  = {                LDTMU,         },
129         [5]  = { THRSW,         LDTMU,         },
130         [6]  = {                LDTMU,  LDUNIF },
131         [7]  = { THRSW,         LDTMU,  LDUNIF },
132         [8]  = {        LDVARY,                },
133         [9]  = { THRSW, LDVARY,                },
134         [10] = {        LDVARY,         LDUNIF },
135         [11] = { THRSW, LDVARY,         LDUNIF },
136         [12] = {        LDVARY, LDTMU,         },
137         [13] = { THRSW, LDVARY, LDTMU,         },
138         [14] = { SMIMM, LDVARY,                },
139         [15] = { SMIMM,                        },
140         [16] = {        LDTLB,                 },
141         [17] = {        LDTLBU,                },
142         /* 18-21 reserved */
143         [22] = { UCB,                          },
144         [23] = { ROT,                          },
145         [24] = {        LDVPM,                 },
146         [25] = { THRSW, LDVPM,                 },
147         [26] = {        LDVPM,          LDUNIF },
148         [27] = { THRSW, LDVPM,          LDUNIF },
149         [28] = {        LDVPM, LDTMU,          },
150         [29] = { THRSW, LDVPM, LDTMU,          },
151         [30] = { SMIMM, LDVPM,                 },
152         [31] = { SMIMM,                        },
153 };
154 
155 static const struct v3d_qpu_sig v40_sig_map[] = {
156         /*      MISC    R3      R4      R5 */
157         [0]  = {                               },
158         [1]  = { THRSW,                        },
159         [2]  = {                        LDUNIF },
160         [3]  = { THRSW,                 LDUNIF },
161         [4]  = {                LDTMU,         },
162         [5]  = { THRSW,         LDTMU,         },
163         [6]  = {                LDTMU,  LDUNIF },
164         [7]  = { THRSW,         LDTMU,  LDUNIF },
165         [8]  = {        LDVARY,                },
166         [9]  = { THRSW, LDVARY,                },
167         [10] = {        LDVARY,         LDUNIF },
168         [11] = { THRSW, LDVARY,         LDUNIF },
169         /* 12-13 reserved */
170         [14] = { SMIMM, LDVARY,                },
171         [15] = { SMIMM,                        },
172         [16] = {        LDTLB,                 },
173         [17] = {        LDTLBU,                },
174         [18] = {                        WRTMUC },
175         [19] = { THRSW,                 WRTMUC },
176         [20] = {        LDVARY,         WRTMUC },
177         [21] = { THRSW, LDVARY,         WRTMUC },
178         [22] = { UCB,                          },
179         [23] = { ROT,                          },
180         /* 24-30 reserved */
181         [31] = { SMIMM,         LDTMU,         },
182 };
183 
184 static const struct v3d_qpu_sig v41_sig_map[] = {
185         /*      MISC       phys    R5 */
186         [0]  = {                          },
187         [1]  = { THRSW,                   },
188         [2]  = {                   LDUNIF },
189         [3]  = { THRSW,            LDUNIF },
190         [4]  = {           LDTMU,         },
191         [5]  = { THRSW,    LDTMU,         },
192         [6]  = {           LDTMU,  LDUNIF },
193         [7]  = { THRSW,    LDTMU,  LDUNIF },
194         [8]  = {           LDVARY,        },
195         [9]  = { THRSW,    LDVARY,        },
196         [10] = {           LDVARY, LDUNIF },
197         [11] = { THRSW,    LDVARY, LDUNIF },
198         [12] = { LDUNIFRF                 },
199         [13] = { THRSW,    LDUNIFRF       },
200         [14] = { SMIMM,    LDVARY,        },
201         [15] = { SMIMM,                   },
202         [16] = {           LDTLB,         },
203         [17] = {           LDTLBU,        },
204         [18] = {                          WRTMUC },
205         [19] = { THRSW,                   WRTMUC },
206         [20] = {           LDVARY,        WRTMUC },
207         [21] = { THRSW,    LDVARY,        WRTMUC },
208         [22] = { UCB,                     },
209         [23] = { ROT,                     },
210         [24] = {                   LDUNIFA},
211         [25] = { LDUNIFARF                },
212         /* 26-30 reserved */
213         [31] = { SMIMM,            LDTMU, },
214 };
215 
216 bool
v3d_qpu_sig_unpack(const struct v3d_device_info * devinfo,uint32_t packed_sig,struct v3d_qpu_sig * sig)217 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
218                    uint32_t packed_sig,
219                    struct v3d_qpu_sig *sig)
220 {
221         if (packed_sig >= ARRAY_SIZE(v33_sig_map))
222                 return false;
223 
224         if (devinfo->ver >= 41)
225                 *sig = v41_sig_map[packed_sig];
226         else if (devinfo->ver == 40)
227                 *sig = v40_sig_map[packed_sig];
228         else
229                 *sig = v33_sig_map[packed_sig];
230 
231         /* Signals with zeroed unpacked contents after element 0 are reserved. */
232         return (packed_sig == 0 ||
233                 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
234 }
235 
236 bool
v3d_qpu_sig_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig,uint32_t * packed_sig)237 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
238                  const struct v3d_qpu_sig *sig,
239                  uint32_t *packed_sig)
240 {
241         static const struct v3d_qpu_sig *map;
242 
243         if (devinfo->ver >= 41)
244                 map = v41_sig_map;
245         else if (devinfo->ver == 40)
246                 map = v40_sig_map;
247         else
248                 map = v33_sig_map;
249 
250         for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
251                 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
252                         *packed_sig = i;
253                         return true;
254                 }
255         }
256 
257         return false;
258 }
259 
260 static const uint32_t small_immediates[] = {
261         0, 1, 2, 3,
262         4, 5, 6, 7,
263         8, 9, 10, 11,
264         12, 13, 14, 15,
265         -16, -15, -14, -13,
266         -12, -11, -10, -9,
267         -8, -7, -6, -5,
268         -4, -3, -2, -1,
269         0x3b800000, /* 2.0^-8 */
270         0x3c000000, /* 2.0^-7 */
271         0x3c800000, /* 2.0^-6 */
272         0x3d000000, /* 2.0^-5 */
273         0x3d800000, /* 2.0^-4 */
274         0x3e000000, /* 2.0^-3 */
275         0x3e800000, /* 2.0^-2 */
276         0x3f000000, /* 2.0^-1 */
277         0x3f800000, /* 2.0^0 */
278         0x40000000, /* 2.0^1 */
279         0x40800000, /* 2.0^2 */
280         0x41000000, /* 2.0^3 */
281         0x41800000, /* 2.0^4 */
282         0x42000000, /* 2.0^5 */
283         0x42800000, /* 2.0^6 */
284         0x43000000, /* 2.0^7 */
285 };
286 
287 bool
v3d_qpu_small_imm_unpack(const struct v3d_device_info * devinfo,uint32_t packed_small_immediate,uint32_t * small_immediate)288 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
289                          uint32_t packed_small_immediate,
290                          uint32_t *small_immediate)
291 {
292         if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
293                 return false;
294 
295         *small_immediate = small_immediates[packed_small_immediate];
296         return true;
297 }
298 
299 bool
v3d_qpu_small_imm_pack(const struct v3d_device_info * devinfo,uint32_t value,uint32_t * packed_small_immediate)300 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
301                        uint32_t value,
302                        uint32_t *packed_small_immediate)
303 {
304         STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
305 
306         for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
307                 if (small_immediates[i] == value) {
308                         *packed_small_immediate = i;
309                         return true;
310                 }
311         }
312 
313         return false;
314 }
315 
316 bool
v3d_qpu_flags_unpack(const struct v3d_device_info * devinfo,uint32_t packed_cond,struct v3d_qpu_flags * cond)317 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
318                      uint32_t packed_cond,
319                      struct v3d_qpu_flags *cond)
320 {
321         static const enum v3d_qpu_cond cond_map[4] = {
322                 [0] = V3D_QPU_COND_IFA,
323                 [1] = V3D_QPU_COND_IFB,
324                 [2] = V3D_QPU_COND_IFNA,
325                 [3] = V3D_QPU_COND_IFNB,
326         };
327 
328         cond->ac = V3D_QPU_COND_NONE;
329         cond->mc = V3D_QPU_COND_NONE;
330         cond->apf = V3D_QPU_PF_NONE;
331         cond->mpf = V3D_QPU_PF_NONE;
332         cond->auf = V3D_QPU_UF_NONE;
333         cond->muf = V3D_QPU_UF_NONE;
334 
335         if (packed_cond == 0) {
336                 return true;
337         } else if (packed_cond >> 2 == 0) {
338                 cond->apf = packed_cond & 0x3;
339         } else if (packed_cond >> 4 == 0) {
340                 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
341         } else if (packed_cond == 0x10) {
342                 return false;
343         } else if (packed_cond >> 2 == 0x4) {
344                 cond->mpf = packed_cond & 0x3;
345         } else if (packed_cond >> 4 == 0x1) {
346                 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
347         } else if (packed_cond >> 4 == 0x2) {
348                 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
349                 cond->mpf = packed_cond & 0x3;
350         } else if (packed_cond >> 4 == 0x3) {
351                 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
352                 cond->apf = packed_cond & 0x3;
353         } else if (packed_cond >> 6) {
354                 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
355                 if (((packed_cond >> 2) & 0x3) == 0) {
356                         cond->ac = cond_map[packed_cond & 0x3];
357                 } else {
358                         cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
359                 }
360         }
361 
362         return true;
363 }
364 
365 bool
v3d_qpu_flags_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_flags * cond,uint32_t * packed_cond)366 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
367                    const struct v3d_qpu_flags *cond,
368                    uint32_t *packed_cond)
369 {
370 #define AC (1 << 0)
371 #define MC (1 << 1)
372 #define APF (1 << 2)
373 #define MPF (1 << 3)
374 #define AUF (1 << 4)
375 #define MUF (1 << 5)
376         static const struct {
377                 uint8_t flags_present;
378                 uint8_t bits;
379         } flags_table[] = {
380                 { 0,        0 },
381                 { APF,      0 },
382                 { AUF,      0 },
383                 { MPF,      (1 << 4) },
384                 { MUF,      (1 << 4) },
385                 { AC,       (1 << 5) },
386                 { AC | MPF, (1 << 5) },
387                 { MC,       (1 << 5) | (1 << 4) },
388                 { MC | APF, (1 << 5) | (1 << 4) },
389                 { MC | AC,  (1 << 6) },
390                 { MC | AUF, (1 << 6) },
391         };
392 
393         uint8_t flags_present = 0;
394         if (cond->ac != V3D_QPU_COND_NONE)
395                 flags_present |= AC;
396         if (cond->mc != V3D_QPU_COND_NONE)
397                 flags_present |= MC;
398         if (cond->apf != V3D_QPU_PF_NONE)
399                 flags_present |= APF;
400         if (cond->mpf != V3D_QPU_PF_NONE)
401                 flags_present |= MPF;
402         if (cond->auf != V3D_QPU_UF_NONE)
403                 flags_present |= AUF;
404         if (cond->muf != V3D_QPU_UF_NONE)
405                 flags_present |= MUF;
406 
407         for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
408                 if (flags_table[i].flags_present != flags_present)
409                         continue;
410 
411                 *packed_cond = flags_table[i].bits;
412 
413                 *packed_cond |= cond->apf;
414                 *packed_cond |= cond->mpf;
415 
416                 if (flags_present & AUF)
417                         *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
418                 if (flags_present & MUF)
419                         *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
420 
421                 if (flags_present & AC) {
422                         if (*packed_cond & (1 << 6))
423                                 *packed_cond |= cond->ac - V3D_QPU_COND_IFA;
424                         else
425                                 *packed_cond |= (cond->ac -
426                                                  V3D_QPU_COND_IFA) << 2;
427                 }
428 
429                 if (flags_present & MC) {
430                         if (*packed_cond & (1 << 6))
431                                 *packed_cond |= (cond->mc -
432                                                  V3D_QPU_COND_IFA) << 4;
433                         else
434                                 *packed_cond |= (cond->mc -
435                                                  V3D_QPU_COND_IFA) << 2;
436                 }
437 
438                 return true;
439         }
440 
441         return false;
442 }
443 
444 /* Make a mapping of the table of opcodes in the spec.  The opcode is
445  * determined by a combination of the opcode field, and in the case of 0 or
446  * 1-arg opcodes, the mux_b field as well.
447  */
448 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
449 #define ANYMUX MUX_MASK(0, 7)
450 
451 struct opcode_desc {
452         uint8_t opcode_first;
453         uint8_t opcode_last;
454         uint8_t mux_b_mask;
455         uint8_t mux_a_mask;
456         uint8_t op;
457 
458         /* first_ver == 0 if it's the same across all V3D versions.
459          * first_ver == X, last_ver == 0 if it's the same for all V3D versions
460          *   starting from X
461          * first_ver == X, last_ver == Y if it's the same for all V3D versions
462          *   on the range X through Y
463          */
464         uint8_t first_ver;
465         uint8_t last_ver;
466 };
467 
468 static const struct opcode_desc add_ops[] = {
469         /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
470         { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADD },
471         { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
472         { 53,  55,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
473         { 56,  56,  ANYMUX, ANYMUX, V3D_QPU_A_ADD },
474         { 57,  59,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
475         { 60,  60,  ANYMUX, ANYMUX, V3D_QPU_A_SUB },
476         { 61,  63,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
477         { 64,  111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
478         { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
479         { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
480         { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
481         { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
482         { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
483         { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
484         { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
485         { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
486         /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
487         { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
488         { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
489         { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
490 
491         { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
492         { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
493         { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
494 
495         { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
496         { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
497         { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
498         { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
499         { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
500         { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
501         { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP },
502         { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP },
503         { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
504         { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
505         { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
506         { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
507         { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
508         { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
509         { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
510         { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
511         { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
512         { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
513 
514         { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
515         { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
516         { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
517         { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
518 
519         { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
520         { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
521         { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
522         { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
523         { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
524         { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 },
525         { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
526         { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
527         { 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 },
528         { 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 },
529         { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
530 
531         { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
532         { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
533         { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
534         { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
535         { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
536         { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 },
537         { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 },
538         { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 },
539         { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 },
540         { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 },
541         { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
542         { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
543 
544         /* FIXME: MORE COMPLICATED */
545         /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
546 
547         { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
548         { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
549 
550         { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
551         { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
552         { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
553         { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
554         { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
555         { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
556         { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
557         { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
558 
559         { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
560         { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
561 
562         /* The stvpms are distinguished by the waddr field. */
563         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
564         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
565         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
566 
567         { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
568         { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
569         { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
570 };
571 
572 static const struct opcode_desc mul_ops[] = {
573         { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
574         { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
575         { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
576         { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
577         { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
578         { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
579         { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
580         { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
581         { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
582         { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
583         { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
584 };
585 
586 /* Returns true if op_desc should be filtered out based on devinfo->ver
587  * against op_desc->first_ver and op_desc->last_ver. Check notes about
588  * first_ver/last_ver on struct opcode_desc comments.
589  */
590 static bool
opcode_invalid_in_version(const struct v3d_device_info * devinfo,const struct opcode_desc * op_desc)591 opcode_invalid_in_version(const struct v3d_device_info *devinfo,
592                           const struct opcode_desc *op_desc)
593 {
594         return (op_desc->first_ver != 0 && devinfo->ver < op_desc->first_ver) ||
595                 (op_desc->last_ver != 0  && devinfo->ver > op_desc->last_ver);
596 }
597 
598 static const struct opcode_desc *
lookup_opcode_from_packed(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint32_t opcode,uint32_t mux_a,uint32_t mux_b)599 lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
600                           const struct opcode_desc *opcodes,
601                           size_t num_opcodes, uint32_t opcode,
602                           uint32_t mux_a, uint32_t mux_b)
603 {
604         for (int i = 0; i < num_opcodes; i++) {
605                 const struct opcode_desc *op_desc = &opcodes[i];
606 
607                 if (opcode < op_desc->opcode_first ||
608                     opcode > op_desc->opcode_last)
609                         continue;
610 
611                 if (opcode_invalid_in_version(devinfo, op_desc))
612                         continue;
613 
614                 if (!(op_desc->mux_b_mask & (1 << mux_b)))
615                         continue;
616 
617                 if (!(op_desc->mux_a_mask & (1 << mux_a)))
618                         continue;
619 
620                 return op_desc;
621         }
622 
623         return NULL;
624 }
625 
626 static bool
v3d_qpu_float32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)627 v3d_qpu_float32_unpack_unpack(uint32_t packed,
628                               enum v3d_qpu_input_unpack *unpacked)
629 {
630         switch (packed) {
631         case 0:
632                 *unpacked = V3D_QPU_UNPACK_ABS;
633                 return true;
634         case 1:
635                 *unpacked = V3D_QPU_UNPACK_NONE;
636                 return true;
637         case 2:
638                 *unpacked = V3D_QPU_UNPACK_L;
639                 return true;
640         case 3:
641                 *unpacked = V3D_QPU_UNPACK_H;
642                 return true;
643         default:
644                 return false;
645         }
646 }
647 
648 static bool
v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)649 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
650                             uint32_t *packed)
651 {
652         switch (unpacked) {
653         case V3D_QPU_UNPACK_ABS:
654                 *packed = 0;
655                 return true;
656         case V3D_QPU_UNPACK_NONE:
657                 *packed = 1;
658                 return true;
659         case V3D_QPU_UNPACK_L:
660                 *packed = 2;
661                 return true;
662         case V3D_QPU_UNPACK_H:
663                 *packed = 3;
664                 return true;
665         default:
666                 return false;
667         }
668 }
669 
670 static bool
v3d_qpu_float16_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)671 v3d_qpu_float16_unpack_unpack(uint32_t packed,
672                               enum v3d_qpu_input_unpack *unpacked)
673 {
674         switch (packed) {
675         case 0:
676                 *unpacked = V3D_QPU_UNPACK_NONE;
677                 return true;
678         case 1:
679                 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
680                 return true;
681         case 2:
682                 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
683                 return true;
684         case 3:
685                 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
686                 return true;
687         case 4:
688                 *unpacked = V3D_QPU_UNPACK_SWAP_16;
689                 return true;
690         default:
691                 return false;
692         }
693 }
694 
695 static bool
v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)696 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
697                             uint32_t *packed)
698 {
699         switch (unpacked) {
700         case V3D_QPU_UNPACK_NONE:
701                 *packed = 0;
702                 return true;
703         case V3D_QPU_UNPACK_REPLICATE_32F_16:
704                 *packed = 1;
705                 return true;
706         case V3D_QPU_UNPACK_REPLICATE_L_16:
707                 *packed = 2;
708                 return true;
709         case V3D_QPU_UNPACK_REPLICATE_H_16:
710                 *packed = 3;
711                 return true;
712         case V3D_QPU_UNPACK_SWAP_16:
713                 *packed = 4;
714                 return true;
715         default:
716                 return false;
717         }
718 }
719 
720 static bool
v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)721 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
722                           uint32_t *packed)
723 {
724         switch (unpacked) {
725         case V3D_QPU_PACK_NONE:
726                 *packed = 0;
727                 return true;
728         case V3D_QPU_PACK_L:
729                 *packed = 1;
730                 return true;
731         case V3D_QPU_PACK_H:
732                 *packed = 2;
733                 return true;
734         default:
735                 return false;
736         }
737 }
738 
739 static bool
v3d_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)740 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
741                    struct v3d_qpu_instr *instr)
742 {
743         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
744         uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
745         uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);
746         uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
747 
748         uint32_t map_op = op;
749         /* Some big clusters of opcodes are replicated with unpack
750          * flags
751          */
752         if (map_op >= 249 && map_op <= 251)
753                 map_op = (map_op - 249 + 245);
754         if (map_op >= 253 && map_op <= 255)
755                 map_op = (map_op - 253 + 245);
756 
757         const struct opcode_desc *desc =
758                 lookup_opcode_from_packed(devinfo, add_ops, ARRAY_SIZE(add_ops),
759                                           map_op, mux_a, mux_b);
760 
761         if (!desc)
762                 return false;
763 
764         instr->alu.add.op = desc->op;
765 
766         /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
767          * operands.
768          */
769         if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
770                 if (instr->alu.add.op == V3D_QPU_A_FMIN)
771                         instr->alu.add.op = V3D_QPU_A_FMAX;
772                 if (instr->alu.add.op == V3D_QPU_A_FADD)
773                         instr->alu.add.op = V3D_QPU_A_FADDNF;
774         }
775 
776         /* Some QPU ops require a bit more than just basic opcode and mux a/b
777          * comparisons to distinguish them.
778          */
779         switch (instr->alu.add.op) {
780         case V3D_QPU_A_STVPMV:
781         case V3D_QPU_A_STVPMD:
782         case V3D_QPU_A_STVPMP:
783                 switch (waddr) {
784                 case 0:
785                         instr->alu.add.op = V3D_QPU_A_STVPMV;
786                         break;
787                 case 1:
788                         instr->alu.add.op = V3D_QPU_A_STVPMD;
789                         break;
790                 case 2:
791                         instr->alu.add.op = V3D_QPU_A_STVPMP;
792                         break;
793                 default:
794                         return false;
795                 }
796                 break;
797         default:
798                 break;
799         }
800 
801         switch (instr->alu.add.op) {
802         case V3D_QPU_A_FADD:
803         case V3D_QPU_A_FADDNF:
804         case V3D_QPU_A_FSUB:
805         case V3D_QPU_A_FMIN:
806         case V3D_QPU_A_FMAX:
807         case V3D_QPU_A_FCMP:
808         case V3D_QPU_A_VFPACK:
809                 if (instr->alu.add.op != V3D_QPU_A_VFPACK)
810                         instr->alu.add.output_pack = (op >> 4) & 0x3;
811                 else
812                         instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
813 
814                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
815                                                    &instr->alu.add.a_unpack)) {
816                         return false;
817                 }
818 
819                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
820                                                    &instr->alu.add.b_unpack)) {
821                         return false;
822                 }
823                 break;
824 
825         case V3D_QPU_A_FFLOOR:
826         case V3D_QPU_A_FROUND:
827         case V3D_QPU_A_FTRUNC:
828         case V3D_QPU_A_FCEIL:
829         case V3D_QPU_A_FDX:
830         case V3D_QPU_A_FDY:
831                 instr->alu.add.output_pack = mux_b & 0x3;
832 
833                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
834                                                    &instr->alu.add.a_unpack)) {
835                         return false;
836                 }
837                 break;
838 
839         case V3D_QPU_A_FTOIN:
840         case V3D_QPU_A_FTOIZ:
841         case V3D_QPU_A_FTOUZ:
842         case V3D_QPU_A_FTOC:
843                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
844 
845                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
846                                                    &instr->alu.add.a_unpack)) {
847                         return false;
848                 }
849                 break;
850 
851         case V3D_QPU_A_VFMIN:
852         case V3D_QPU_A_VFMAX:
853                 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
854                                                    &instr->alu.add.a_unpack)) {
855                         return false;
856                 }
857 
858                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
859                 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
860                 break;
861 
862         default:
863                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
864                 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
865                 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
866                 break;
867         }
868 
869         instr->alu.add.a = mux_a;
870         instr->alu.add.b = mux_b;
871         instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
872 
873         instr->alu.add.magic_write = false;
874         if (packed_inst & V3D_QPU_MA) {
875                 switch (instr->alu.add.op) {
876                 case V3D_QPU_A_LDVPMV_IN:
877                         instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
878                         break;
879                 case V3D_QPU_A_LDVPMD_IN:
880                         instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
881                         break;
882                 case V3D_QPU_A_LDVPMG_IN:
883                         instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
884                         break;
885                 default:
886                         instr->alu.add.magic_write = true;
887                         break;
888                 }
889         }
890 
891         return true;
892 }
893 
894 static bool
v3d_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)895 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
896                    struct v3d_qpu_instr *instr)
897 {
898         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
899         uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
900         uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
901 
902         {
903                 const struct opcode_desc *desc =
904                         lookup_opcode_from_packed(devinfo, mul_ops,
905                                                   ARRAY_SIZE(mul_ops),
906                                                   op, mux_a, mux_b);
907                 if (!desc)
908                         return false;
909 
910                 instr->alu.mul.op = desc->op;
911         }
912 
913         switch (instr->alu.mul.op) {
914         case V3D_QPU_M_FMUL:
915                 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
916 
917                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
918                                                    &instr->alu.mul.a_unpack)) {
919                         return false;
920                 }
921 
922                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
923                                                    &instr->alu.mul.b_unpack)) {
924                         return false;
925                 }
926 
927                 break;
928 
929         case V3D_QPU_M_FMOV:
930                 instr->alu.mul.output_pack = (((op & 1) << 1) +
931                                               ((mux_b >> 2) & 1));
932 
933                 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
934                                                    &instr->alu.mul.a_unpack)) {
935                         return false;
936                 }
937 
938                 break;
939 
940         case V3D_QPU_M_VFMUL:
941                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
942 
943                 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
944                                                    &instr->alu.mul.a_unpack)) {
945                         return false;
946                 }
947 
948                 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
949 
950                 break;
951 
952         default:
953                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
954                 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
955                 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
956                 break;
957         }
958 
959         instr->alu.mul.a = mux_a;
960         instr->alu.mul.b = mux_b;
961         instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
962         instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
963 
964         return true;
965 }
966 
967 static const struct opcode_desc *
lookup_opcode_from_instr(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint8_t op)968 lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
969                          const struct opcode_desc *opcodes, size_t num_opcodes,
970                          uint8_t op)
971 {
972         for (int i = 0; i < num_opcodes; i++) {
973                 const struct opcode_desc *op_desc = &opcodes[i];
974 
975                 if (op_desc->op != op)
976                         continue;
977 
978                 if (opcode_invalid_in_version(devinfo, op_desc))
979                         continue;
980 
981                 return op_desc;
982         }
983 
984         return NULL;
985 }
986 
987 static bool
v3d_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)988 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
989                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
990 {
991         uint32_t waddr = instr->alu.add.waddr;
992         uint32_t mux_a = instr->alu.add.a;
993         uint32_t mux_b = instr->alu.add.b;
994         int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
995         const struct opcode_desc *desc =
996                 lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops),
997                                          instr->alu.add.op);
998 
999         if (!desc)
1000                 return false;
1001 
1002         uint32_t opcode = desc->opcode_first;
1003 
1004         /* If an operation doesn't use an arg, its mux values may be used to
1005          * identify the operation type.
1006          */
1007         if (nsrc < 2)
1008                 mux_b = ffs(desc->mux_b_mask) - 1;
1009 
1010         if (nsrc < 1)
1011                 mux_a = ffs(desc->mux_a_mask) - 1;
1012 
1013         bool no_magic_write = false;
1014 
1015         switch (instr->alu.add.op) {
1016         case V3D_QPU_A_STVPMV:
1017                 waddr = 0;
1018                 no_magic_write = true;
1019                 break;
1020         case V3D_QPU_A_STVPMD:
1021                 waddr = 1;
1022                 no_magic_write = true;
1023                 break;
1024         case V3D_QPU_A_STVPMP:
1025                 waddr = 2;
1026                 no_magic_write = true;
1027                 break;
1028 
1029         case V3D_QPU_A_LDVPMV_IN:
1030         case V3D_QPU_A_LDVPMD_IN:
1031         case V3D_QPU_A_LDVPMP:
1032         case V3D_QPU_A_LDVPMG_IN:
1033                 assert(!instr->alu.add.magic_write);
1034                 break;
1035 
1036         case V3D_QPU_A_LDVPMV_OUT:
1037         case V3D_QPU_A_LDVPMD_OUT:
1038         case V3D_QPU_A_LDVPMG_OUT:
1039                 assert(!instr->alu.add.magic_write);
1040                 *packed_instr |= V3D_QPU_MA;
1041                 break;
1042 
1043         default:
1044                 break;
1045         }
1046 
1047         switch (instr->alu.add.op) {
1048         case V3D_QPU_A_FADD:
1049         case V3D_QPU_A_FADDNF:
1050         case V3D_QPU_A_FSUB:
1051         case V3D_QPU_A_FMIN:
1052         case V3D_QPU_A_FMAX:
1053         case V3D_QPU_A_FCMP: {
1054                 uint32_t output_pack;
1055                 uint32_t a_unpack;
1056                 uint32_t b_unpack;
1057 
1058                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1059                                                &output_pack)) {
1060                         return false;
1061                 }
1062                 opcode |= output_pack << 4;
1063 
1064                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1065                                                  &a_unpack)) {
1066                         return false;
1067                 }
1068 
1069                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1070                                                  &b_unpack)) {
1071                         return false;
1072                 }
1073 
1074                 /* These operations with commutative operands are
1075                  * distinguished by which order their operands come in.
1076                  */
1077                 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1078                 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1079                       instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1080                     ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1081                       instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1082                         uint32_t temp;
1083 
1084                         temp = a_unpack;
1085                         a_unpack = b_unpack;
1086                         b_unpack = temp;
1087 
1088                         temp = mux_a;
1089                         mux_a = mux_b;
1090                         mux_b = temp;
1091                 }
1092 
1093                 opcode |= a_unpack << 2;
1094                 opcode |= b_unpack << 0;
1095 
1096                 break;
1097         }
1098 
1099         case V3D_QPU_A_VFPACK: {
1100                 uint32_t a_unpack;
1101                 uint32_t b_unpack;
1102 
1103                 if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||
1104                     instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {
1105                         return false;
1106                 }
1107 
1108                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1109                                                  &a_unpack)) {
1110                         return false;
1111                 }
1112 
1113                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1114                                                  &b_unpack)) {
1115                         return false;
1116                 }
1117 
1118                 opcode = (opcode & ~(1 << 2)) | (a_unpack << 2);
1119                 opcode = (opcode & ~(1 << 0)) | (b_unpack << 0);
1120 
1121                 break;
1122         }
1123 
1124         case V3D_QPU_A_FFLOOR:
1125         case V3D_QPU_A_FROUND:
1126         case V3D_QPU_A_FTRUNC:
1127         case V3D_QPU_A_FCEIL:
1128         case V3D_QPU_A_FDX:
1129         case V3D_QPU_A_FDY: {
1130                 uint32_t packed;
1131 
1132                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1133                                                &packed)) {
1134                         return false;
1135                 }
1136                 mux_b |= packed;
1137 
1138                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1139                                                  &packed)) {
1140                         return false;
1141                 }
1142                 if (packed == 0)
1143                         return false;
1144                 opcode = (opcode & ~(1 << 2)) | packed << 2;
1145                 break;
1146         }
1147 
1148         case V3D_QPU_A_FTOIN:
1149         case V3D_QPU_A_FTOIZ:
1150         case V3D_QPU_A_FTOUZ:
1151         case V3D_QPU_A_FTOC:
1152                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1153                         return false;
1154 
1155                 uint32_t packed;
1156                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1157                                                  &packed)) {
1158                         return false;
1159                 }
1160                 if (packed == 0)
1161                         return false;
1162                 opcode |= packed << 2;
1163 
1164                 break;
1165 
1166         case V3D_QPU_A_VFMIN:
1167         case V3D_QPU_A_VFMAX:
1168                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1169                     instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
1170                         return false;
1171                 }
1172 
1173                 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
1174                                                  &packed)) {
1175                         return false;
1176                 }
1177                 opcode |= packed;
1178                 break;
1179 
1180         default:
1181                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
1182                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1183                      instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
1184                      instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
1185                         return false;
1186                 }
1187                 break;
1188         }
1189 
1190         *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);
1191         *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);
1192         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
1193         *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1194         if (instr->alu.add.magic_write && !no_magic_write)
1195                 *packed_instr |= V3D_QPU_MA;
1196 
1197         return true;
1198 }
1199 
1200 static bool
v3d_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1201 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1202                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1203 {
1204         uint32_t mux_a = instr->alu.mul.a;
1205         uint32_t mux_b = instr->alu.mul.b;
1206         int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1207 
1208         const struct opcode_desc *desc =
1209                 lookup_opcode_from_instr(devinfo, mul_ops, ARRAY_SIZE(mul_ops),
1210                                          instr->alu.mul.op);
1211 
1212         if (!desc)
1213                 return false;
1214 
1215         uint32_t opcode = desc->opcode_first;
1216 
1217         /* Some opcodes have a single valid value for their mux a/b, so set
1218          * that here.  If mux a/b determine packing, it will be set below.
1219          */
1220         if (nsrc < 2)
1221                 mux_b = ffs(desc->mux_b_mask) - 1;
1222 
1223         if (nsrc < 1)
1224                 mux_a = ffs(desc->mux_a_mask) - 1;
1225 
1226         switch (instr->alu.mul.op) {
1227         case V3D_QPU_M_FMUL: {
1228                 uint32_t packed;
1229 
1230                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1231                                                &packed)) {
1232                         return false;
1233                 }
1234                 /* No need for a +1 because desc->opcode_first has a 1 in this
1235                  * field.
1236                  */
1237                 opcode += packed << 4;
1238 
1239                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1240                                                  &packed)) {
1241                         return false;
1242                 }
1243                 opcode |= packed << 2;
1244 
1245                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1246                                                  &packed)) {
1247                         return false;
1248                 }
1249                 opcode |= packed << 0;
1250                 break;
1251         }
1252 
1253         case V3D_QPU_M_FMOV: {
1254                 uint32_t packed;
1255 
1256                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1257                                                &packed)) {
1258                         return false;
1259                 }
1260                 opcode |= (packed >> 1) & 1;
1261                 mux_b = (packed & 1) << 2;
1262 
1263                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1264                                                  &packed)) {
1265                         return false;
1266                 }
1267                 mux_b |= packed;
1268                 break;
1269         }
1270 
1271         case V3D_QPU_M_VFMUL: {
1272                 uint32_t packed;
1273 
1274                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1275                         return false;
1276 
1277                 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1278                                                  &packed)) {
1279                         return false;
1280                 }
1281                 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1282                         opcode = 8;
1283                 else
1284                         opcode |= (packed + 4) & 7;
1285 
1286                 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1287                         return false;
1288 
1289                 break;
1290         }
1291 
1292         default:
1293                 break;
1294         }
1295 
1296         *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);
1297         *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);
1298 
1299         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
1300         *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1301         if (instr->alu.mul.magic_write)
1302                 *packed_instr |= V3D_QPU_MM;
1303 
1304         return true;
1305 }
1306 
1307 static bool
v3d_qpu_instr_unpack_alu(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1308 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1309                          uint64_t packed_instr,
1310                          struct v3d_qpu_instr *instr)
1311 {
1312         instr->type = V3D_QPU_INSTR_TYPE_ALU;
1313 
1314         if (!v3d_qpu_sig_unpack(devinfo,
1315                                 QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),
1316                                 &instr->sig))
1317                 return false;
1318 
1319         uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);
1320         if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1321                 instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;
1322                 instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;
1323 
1324                 instr->flags.ac = V3D_QPU_COND_NONE;
1325                 instr->flags.mc = V3D_QPU_COND_NONE;
1326                 instr->flags.apf = V3D_QPU_PF_NONE;
1327                 instr->flags.mpf = V3D_QPU_PF_NONE;
1328                 instr->flags.auf = V3D_QPU_UF_NONE;
1329                 instr->flags.muf = V3D_QPU_UF_NONE;
1330         } else {
1331                 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1332                         return false;
1333         }
1334 
1335         instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
1336         instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
1337 
1338         if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1339                 return false;
1340 
1341         if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1342                 return false;
1343 
1344         return true;
1345 }
1346 
1347 static bool
v3d_qpu_instr_unpack_branch(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1348 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1349                             uint64_t packed_instr,
1350                             struct v3d_qpu_instr *instr)
1351 {
1352         instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1353 
1354         uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);
1355         if (cond == 0)
1356                 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1357         else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1358                  V3D_QPU_BRANCH_COND_ALLNA)
1359                 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1360         else
1361                 return false;
1362 
1363         uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);
1364         if (msfign == 3)
1365                 return false;
1366         instr->branch.msfign = msfign;
1367 
1368         instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);
1369 
1370         instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;
1371         if (instr->branch.ub) {
1372                 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1373                                                   V3D_QPU_BRANCH_BDU);
1374         }
1375 
1376         instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1377                                               V3D_QPU_RADDR_A);
1378 
1379         instr->branch.offset = 0;
1380 
1381         instr->branch.offset +=
1382                 QPU_GET_FIELD(packed_instr,
1383                               V3D_QPU_BRANCH_ADDR_LOW) << 3;
1384 
1385         instr->branch.offset +=
1386                 QPU_GET_FIELD(packed_instr,
1387                               V3D_QPU_BRANCH_ADDR_HIGH) << 24;
1388 
1389         return true;
1390 }
1391 
1392 bool
v3d_qpu_instr_unpack(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1393 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1394                      uint64_t packed_instr,
1395                      struct v3d_qpu_instr *instr)
1396 {
1397         if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {
1398                 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1399         } else {
1400                 uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);
1401 
1402                 if ((sig & 24) == 16) {
1403                         return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1404                                                            instr);
1405                 } else {
1406                         return false;
1407                 }
1408         }
1409 }
1410 
1411 static bool
v3d_qpu_instr_pack_alu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1412 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1413                        const struct v3d_qpu_instr *instr,
1414                        uint64_t *packed_instr)
1415 {
1416         uint32_t sig;
1417         if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1418                 return false;
1419         *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
1420 
1421         if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1422                 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
1423                 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
1424 
1425                 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1426                         return false;
1427                 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1428                         return false;
1429 
1430                 uint32_t flags;
1431                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1432                         if (instr->flags.ac != V3D_QPU_COND_NONE ||
1433                             instr->flags.mc != V3D_QPU_COND_NONE ||
1434                             instr->flags.apf != V3D_QPU_PF_NONE ||
1435                             instr->flags.mpf != V3D_QPU_PF_NONE ||
1436                             instr->flags.auf != V3D_QPU_UF_NONE ||
1437                             instr->flags.muf != V3D_QPU_UF_NONE) {
1438                                 return false;
1439                         }
1440 
1441                         flags = instr->sig_addr;
1442                         if (instr->sig_magic)
1443                                 flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;
1444                 } else {
1445                         if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1446                                 return false;
1447                 }
1448 
1449                 *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);
1450         } else {
1451                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1452                         return false;
1453         }
1454 
1455         return true;
1456 }
1457 
1458 static bool
v3d_qpu_instr_pack_branch(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1459 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1460                           const struct v3d_qpu_instr *instr,
1461                           uint64_t *packed_instr)
1462 {
1463         *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);
1464 
1465         if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1466                 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1467                                                     V3D_QPU_BRANCH_COND_A0),
1468                                                V3D_QPU_BRANCH_COND);
1469         }
1470 
1471         *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1472                                        V3D_QPU_BRANCH_MSFIGN);
1473 
1474         *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1475                                        V3D_QPU_BRANCH_BDI);
1476 
1477         if (instr->branch.ub) {
1478                 *packed_instr |= V3D_QPU_BRANCH_UB;
1479                 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1480                                                V3D_QPU_BRANCH_BDU);
1481         }
1482 
1483         switch (instr->branch.bdi) {
1484         case V3D_QPU_BRANCH_DEST_ABS:
1485         case V3D_QPU_BRANCH_DEST_REL:
1486                 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1487                                                V3D_QPU_BRANCH_MSFIGN);
1488 
1489                 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1490                                                 ~0xff000000) >> 3,
1491                                                V3D_QPU_BRANCH_ADDR_LOW);
1492 
1493                 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1494                                                V3D_QPU_BRANCH_ADDR_HIGH);
1495                 break;
1496         default:
1497                 break;
1498         }
1499 
1500         if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
1501             instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
1502                 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1503                                                V3D_QPU_RADDR_A);
1504         }
1505 
1506         return true;
1507 }
1508 
1509 bool
v3d_qpu_instr_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1510 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1511                    const struct v3d_qpu_instr *instr,
1512                    uint64_t *packed_instr)
1513 {
1514         *packed_instr = 0;
1515 
1516         switch (instr->type) {
1517         case V3D_QPU_INSTR_TYPE_ALU:
1518                 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1519         case V3D_QPU_INSTR_TYPE_BRANCH:
1520                 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1521         default:
1522                 return false;
1523         }
1524 }
1525