• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Connor Abbott
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28 
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
31 
32 typedef struct {
33    size_t blob_offset;
34    nir_ssa_def *src;
35    nir_block *block;
36 } write_phi_fixup;
37 
38 typedef struct {
39    const nir_shader *nir;
40 
41    struct blob *blob;
42 
43    /* maps pointer to index */
44    struct hash_table *remap_table;
45 
46    /* the next index to assign to a NIR in-memory object */
47    uint32_t next_idx;
48 
49    /* Array of write_phi_fixup structs representing phi sources that need to
50     * be resolved in the second pass.
51     */
52    struct util_dynarray phi_fixups;
53 
54    /* The last serialized type. */
55    const struct glsl_type *last_type;
56    const struct glsl_type *last_interface_type;
57    struct nir_variable_data last_var_data;
58 
59    /* For skipping equal ALU headers (typical after scalarization). */
60    nir_instr_type last_instr_type;
61    uintptr_t last_alu_header_offset;
62 
63    /* Don't write optional data such as variable names. */
64    bool strip;
65 } write_ctx;
66 
67 typedef struct {
68    nir_shader *nir;
69 
70    struct blob_reader *blob;
71 
72    /* the next index to assign to a NIR in-memory object */
73    uint32_t next_idx;
74 
75    /* The length of the index -> object table */
76    uint32_t idx_table_len;
77 
78    /* map from index to deserialized pointer */
79    void **idx_table;
80 
81    /* List of phi sources. */
82    struct list_head phi_srcs;
83 
84    /* The last deserialized type. */
85    const struct glsl_type *last_type;
86    const struct glsl_type *last_interface_type;
87    struct nir_variable_data last_var_data;
88 } read_ctx;
89 
90 static void
write_add_object(write_ctx * ctx,const void * obj)91 write_add_object(write_ctx *ctx, const void *obj)
92 {
93    uint32_t index = ctx->next_idx++;
94    assert(index != MAX_OBJECT_IDS);
95    _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
96 }
97 
98 static uint32_t
write_lookup_object(write_ctx * ctx,const void * obj)99 write_lookup_object(write_ctx *ctx, const void *obj)
100 {
101    struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
102    assert(entry);
103    return (uint32_t)(uintptr_t) entry->data;
104 }
105 
106 static void
read_add_object(read_ctx * ctx,void * obj)107 read_add_object(read_ctx *ctx, void *obj)
108 {
109    assert(ctx->next_idx < ctx->idx_table_len);
110    ctx->idx_table[ctx->next_idx++] = obj;
111 }
112 
113 static void *
read_lookup_object(read_ctx * ctx,uint32_t idx)114 read_lookup_object(read_ctx *ctx, uint32_t idx)
115 {
116    assert(idx < ctx->idx_table_len);
117    return ctx->idx_table[idx];
118 }
119 
120 static void *
read_object(read_ctx * ctx)121 read_object(read_ctx *ctx)
122 {
123    return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
124 }
125 
126 static uint32_t
encode_bit_size_3bits(uint8_t bit_size)127 encode_bit_size_3bits(uint8_t bit_size)
128 {
129    /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
130    assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
131    if (bit_size)
132       return util_logbase2(bit_size) + 1;
133    return 0;
134 }
135 
136 static uint8_t
decode_bit_size_3bits(uint8_t bit_size)137 decode_bit_size_3bits(uint8_t bit_size)
138 {
139    if (bit_size)
140       return 1 << (bit_size - 1);
141    return 0;
142 }
143 
144 #define NUM_COMPONENTS_IS_SEPARATE_7   7
145 
146 static uint8_t
encode_num_components_in_3bits(uint8_t num_components)147 encode_num_components_in_3bits(uint8_t num_components)
148 {
149    if (num_components <= 4)
150       return num_components;
151    if (num_components == 8)
152       return 5;
153    if (num_components == 16)
154       return 6;
155 
156    /* special value indicating that num_components is in the next uint32 */
157    return NUM_COMPONENTS_IS_SEPARATE_7;
158 }
159 
160 static uint8_t
decode_num_components_in_3bits(uint8_t value)161 decode_num_components_in_3bits(uint8_t value)
162 {
163    if (value <= 4)
164       return value;
165    if (value == 5)
166       return 8;
167    if (value == 6)
168       return 16;
169 
170    unreachable("invalid num_components encoding");
171    return 0;
172 }
173 
174 static void
write_constant(write_ctx * ctx,const nir_constant * c)175 write_constant(write_ctx *ctx, const nir_constant *c)
176 {
177    blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
178    blob_write_uint32(ctx->blob, c->num_elements);
179    for (unsigned i = 0; i < c->num_elements; i++)
180       write_constant(ctx, c->elements[i]);
181 }
182 
183 static nir_constant *
read_constant(read_ctx * ctx,nir_variable * nvar)184 read_constant(read_ctx *ctx, nir_variable *nvar)
185 {
186    nir_constant *c = ralloc(nvar, nir_constant);
187 
188    blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
189    c->num_elements = blob_read_uint32(ctx->blob);
190    c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
191    for (unsigned i = 0; i < c->num_elements; i++)
192       c->elements[i] = read_constant(ctx, nvar);
193 
194    return c;
195 }
196 
197 enum var_data_encoding {
198    var_encode_full,
199    var_encode_shader_temp,
200    var_encode_function_temp,
201    var_encode_location_diff,
202 };
203 
204 union packed_var {
205    uint32_t u32;
206    struct {
207       unsigned has_name:1;
208       unsigned has_constant_initializer:1;
209       unsigned has_pointer_initializer:1;
210       unsigned has_interface_type:1;
211       unsigned num_state_slots:7;
212       unsigned data_encoding:2;
213       unsigned type_same_as_last:1;
214       unsigned interface_type_same_as_last:1;
215       unsigned _pad:1;
216       unsigned num_members:16;
217    } u;
218 };
219 
220 union packed_var_data_diff {
221    uint32_t u32;
222    struct {
223       int location:13;
224       int location_frac:3;
225       int driver_location:16;
226    } u;
227 };
228 
229 static void
write_variable(write_ctx * ctx,const nir_variable * var)230 write_variable(write_ctx *ctx, const nir_variable *var)
231 {
232    write_add_object(ctx, var);
233 
234    assert(var->num_state_slots < (1 << 7));
235 
236    STATIC_ASSERT(sizeof(union packed_var) == 4);
237    union packed_var flags;
238    flags.u32 = 0;
239 
240    flags.u.has_name = !ctx->strip && var->name;
241    flags.u.has_constant_initializer = !!(var->constant_initializer);
242    flags.u.has_pointer_initializer = !!(var->pointer_initializer);
243    flags.u.has_interface_type = !!(var->interface_type);
244    flags.u.type_same_as_last = var->type == ctx->last_type;
245    flags.u.interface_type_same_as_last =
246       var->interface_type && var->interface_type == ctx->last_interface_type;
247    flags.u.num_state_slots = var->num_state_slots;
248    flags.u.num_members = var->num_members;
249 
250    struct nir_variable_data data = var->data;
251 
252    /* When stripping, we expect that the location is no longer needed,
253     * which is typically after shaders are linked.
254     */
255    if (ctx->strip &&
256        data.mode != nir_var_system_value &&
257        data.mode != nir_var_shader_in &&
258        data.mode != nir_var_shader_out)
259       data.location = 0;
260 
261    /* Temporary variables don't serialize var->data. */
262    if (data.mode == nir_var_shader_temp)
263       flags.u.data_encoding = var_encode_shader_temp;
264    else if (data.mode == nir_var_function_temp)
265       flags.u.data_encoding = var_encode_function_temp;
266    else {
267       struct nir_variable_data tmp = data;
268 
269       tmp.location = ctx->last_var_data.location;
270       tmp.location_frac = ctx->last_var_data.location_frac;
271       tmp.driver_location = ctx->last_var_data.driver_location;
272 
273       /* See if we can encode only the difference in locations from the last
274        * variable.
275        */
276       if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
277           abs((int)data.location -
278               (int)ctx->last_var_data.location) < (1 << 12) &&
279           abs((int)data.driver_location -
280               (int)ctx->last_var_data.driver_location) < (1 << 15))
281          flags.u.data_encoding = var_encode_location_diff;
282       else
283          flags.u.data_encoding = var_encode_full;
284    }
285 
286    blob_write_uint32(ctx->blob, flags.u32);
287 
288    if (!flags.u.type_same_as_last) {
289       encode_type_to_blob(ctx->blob, var->type);
290       ctx->last_type = var->type;
291    }
292 
293    if (var->interface_type && !flags.u.interface_type_same_as_last) {
294       encode_type_to_blob(ctx->blob, var->interface_type);
295       ctx->last_interface_type = var->interface_type;
296    }
297 
298    if (flags.u.has_name)
299       blob_write_string(ctx->blob, var->name);
300 
301    if (flags.u.data_encoding == var_encode_full ||
302        flags.u.data_encoding == var_encode_location_diff) {
303       if (flags.u.data_encoding == var_encode_full) {
304          blob_write_bytes(ctx->blob, &data, sizeof(data));
305       } else {
306          /* Serialize only the difference in locations from the last variable.
307           */
308          union packed_var_data_diff diff;
309 
310          diff.u.location = data.location - ctx->last_var_data.location;
311          diff.u.location_frac = data.location_frac -
312                                 ctx->last_var_data.location_frac;
313          diff.u.driver_location = data.driver_location -
314                                   ctx->last_var_data.driver_location;
315 
316          blob_write_uint32(ctx->blob, diff.u32);
317       }
318 
319       ctx->last_var_data = data;
320    }
321 
322    for (unsigned i = 0; i < var->num_state_slots; i++) {
323       blob_write_bytes(ctx->blob, &var->state_slots[i],
324                        sizeof(var->state_slots[i]));
325    }
326    if (var->constant_initializer)
327       write_constant(ctx, var->constant_initializer);
328    if (var->pointer_initializer)
329       write_lookup_object(ctx, var->pointer_initializer);
330    if (var->num_members > 0) {
331       blob_write_bytes(ctx->blob, (uint8_t *) var->members,
332                        var->num_members * sizeof(*var->members));
333    }
334 }
335 
336 static nir_variable *
read_variable(read_ctx * ctx)337 read_variable(read_ctx *ctx)
338 {
339    nir_variable *var = rzalloc(ctx->nir, nir_variable);
340    read_add_object(ctx, var);
341 
342    union packed_var flags;
343    flags.u32 = blob_read_uint32(ctx->blob);
344 
345    if (flags.u.type_same_as_last) {
346       var->type = ctx->last_type;
347    } else {
348       var->type = decode_type_from_blob(ctx->blob);
349       ctx->last_type = var->type;
350    }
351 
352    if (flags.u.has_interface_type) {
353       if (flags.u.interface_type_same_as_last) {
354          var->interface_type = ctx->last_interface_type;
355       } else {
356          var->interface_type = decode_type_from_blob(ctx->blob);
357          ctx->last_interface_type = var->interface_type;
358       }
359    }
360 
361    if (flags.u.has_name) {
362       const char *name = blob_read_string(ctx->blob);
363       var->name = ralloc_strdup(var, name);
364    } else {
365       var->name = NULL;
366    }
367 
368    if (flags.u.data_encoding == var_encode_shader_temp)
369       var->data.mode = nir_var_shader_temp;
370    else if (flags.u.data_encoding == var_encode_function_temp)
371       var->data.mode = nir_var_function_temp;
372    else if (flags.u.data_encoding == var_encode_full) {
373       blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
374       ctx->last_var_data = var->data;
375    } else { /* var_encode_location_diff */
376       union packed_var_data_diff diff;
377       diff.u32 = blob_read_uint32(ctx->blob);
378 
379       var->data = ctx->last_var_data;
380       var->data.location += diff.u.location;
381       var->data.location_frac += diff.u.location_frac;
382       var->data.driver_location += diff.u.driver_location;
383 
384       ctx->last_var_data = var->data;
385    }
386 
387    var->num_state_slots = flags.u.num_state_slots;
388    if (var->num_state_slots != 0) {
389       var->state_slots = ralloc_array(var, nir_state_slot,
390                                       var->num_state_slots);
391       for (unsigned i = 0; i < var->num_state_slots; i++) {
392          blob_copy_bytes(ctx->blob, &var->state_slots[i],
393                          sizeof(var->state_slots[i]));
394       }
395    }
396    if (flags.u.has_constant_initializer)
397       var->constant_initializer = read_constant(ctx, var);
398    else
399       var->constant_initializer = NULL;
400 
401    if (flags.u.has_pointer_initializer)
402       var->pointer_initializer = read_object(ctx);
403    else
404       var->pointer_initializer = NULL;
405 
406    var->num_members = flags.u.num_members;
407    if (var->num_members > 0) {
408       var->members = ralloc_array(var, struct nir_variable_data,
409                                   var->num_members);
410       blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
411                       var->num_members * sizeof(*var->members));
412    }
413 
414    return var;
415 }
416 
417 static void
write_var_list(write_ctx * ctx,const struct exec_list * src)418 write_var_list(write_ctx *ctx, const struct exec_list *src)
419 {
420    blob_write_uint32(ctx->blob, exec_list_length(src));
421    foreach_list_typed(nir_variable, var, node, src) {
422       write_variable(ctx, var);
423    }
424 }
425 
426 static void
read_var_list(read_ctx * ctx,struct exec_list * dst)427 read_var_list(read_ctx *ctx, struct exec_list *dst)
428 {
429    exec_list_make_empty(dst);
430    unsigned num_vars = blob_read_uint32(ctx->blob);
431    for (unsigned i = 0; i < num_vars; i++) {
432       nir_variable *var = read_variable(ctx);
433       exec_list_push_tail(dst, &var->node);
434    }
435 }
436 
437 static void
write_register(write_ctx * ctx,const nir_register * reg)438 write_register(write_ctx *ctx, const nir_register *reg)
439 {
440    write_add_object(ctx, reg);
441    blob_write_uint32(ctx->blob, reg->num_components);
442    blob_write_uint32(ctx->blob, reg->bit_size);
443    blob_write_uint32(ctx->blob, reg->num_array_elems);
444    blob_write_uint32(ctx->blob, reg->index);
445 }
446 
447 static nir_register *
read_register(read_ctx * ctx)448 read_register(read_ctx *ctx)
449 {
450    nir_register *reg = ralloc(ctx->nir, nir_register);
451    read_add_object(ctx, reg);
452    reg->num_components = blob_read_uint32(ctx->blob);
453    reg->bit_size = blob_read_uint32(ctx->blob);
454    reg->num_array_elems = blob_read_uint32(ctx->blob);
455    reg->index = blob_read_uint32(ctx->blob);
456 
457    list_inithead(&reg->uses);
458    list_inithead(&reg->defs);
459    list_inithead(&reg->if_uses);
460 
461    return reg;
462 }
463 
464 static void
write_reg_list(write_ctx * ctx,const struct exec_list * src)465 write_reg_list(write_ctx *ctx, const struct exec_list *src)
466 {
467    blob_write_uint32(ctx->blob, exec_list_length(src));
468    foreach_list_typed(nir_register, reg, node, src)
469       write_register(ctx, reg);
470 }
471 
472 static void
read_reg_list(read_ctx * ctx,struct exec_list * dst)473 read_reg_list(read_ctx *ctx, struct exec_list *dst)
474 {
475    exec_list_make_empty(dst);
476    unsigned num_regs = blob_read_uint32(ctx->blob);
477    for (unsigned i = 0; i < num_regs; i++) {
478       nir_register *reg = read_register(ctx);
479       exec_list_push_tail(dst, &reg->node);
480    }
481 }
482 
483 union packed_src {
484    uint32_t u32;
485    struct {
486       unsigned is_ssa:1;   /* <-- Header */
487       unsigned is_indirect:1;
488       unsigned object_idx:20;
489       unsigned _footer:10; /* <-- Footer */
490    } any;
491    struct {
492       unsigned _header:22; /* <-- Header */
493       unsigned negate:1;   /* <-- Footer */
494       unsigned abs:1;
495       unsigned swizzle_x:2;
496       unsigned swizzle_y:2;
497       unsigned swizzle_z:2;
498       unsigned swizzle_w:2;
499    } alu;
500    struct {
501       unsigned _header:22; /* <-- Header */
502       unsigned src_type:5; /* <-- Footer */
503       unsigned _pad:5;
504    } tex;
505 };
506 
507 static void
write_src_full(write_ctx * ctx,const nir_src * src,union packed_src header)508 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
509 {
510    /* Since sources are very frequent, we try to save some space when storing
511     * them. In particular, we store whether the source is a register and
512     * whether the register has an indirect index in the low two bits. We can
513     * assume that the high two bits of the index are zero, since otherwise our
514     * address space would've been exhausted allocating the remap table!
515     */
516    header.any.is_ssa = src->is_ssa;
517    if (src->is_ssa) {
518       header.any.object_idx = write_lookup_object(ctx, src->ssa);
519       blob_write_uint32(ctx->blob, header.u32);
520    } else {
521       header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
522       header.any.is_indirect = !!src->reg.indirect;
523       blob_write_uint32(ctx->blob, header.u32);
524       blob_write_uint32(ctx->blob, src->reg.base_offset);
525       if (src->reg.indirect) {
526          union packed_src header = {0};
527          write_src_full(ctx, src->reg.indirect, header);
528       }
529    }
530 }
531 
532 static void
write_src(write_ctx * ctx,const nir_src * src)533 write_src(write_ctx *ctx, const nir_src *src)
534 {
535    union packed_src header = {0};
536    write_src_full(ctx, src, header);
537 }
538 
539 static union packed_src
read_src(read_ctx * ctx,nir_src * src,void * mem_ctx)540 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
541 {
542    STATIC_ASSERT(sizeof(union packed_src) == 4);
543    union packed_src header;
544    header.u32 = blob_read_uint32(ctx->blob);
545 
546    src->is_ssa = header.any.is_ssa;
547    if (src->is_ssa) {
548       src->ssa = read_lookup_object(ctx, header.any.object_idx);
549    } else {
550       src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
551       src->reg.base_offset = blob_read_uint32(ctx->blob);
552       if (header.any.is_indirect) {
553          src->reg.indirect = malloc(sizeof(nir_src));
554          read_src(ctx, src->reg.indirect, mem_ctx);
555       } else {
556          src->reg.indirect = NULL;
557       }
558    }
559    return header;
560 }
561 
562 union packed_dest {
563    uint8_t u8;
564    struct {
565       uint8_t is_ssa:1;
566       uint8_t num_components:3;
567       uint8_t bit_size:3;
568       uint8_t _pad:1;
569    } ssa;
570    struct {
571       uint8_t is_ssa:1;
572       uint8_t is_indirect:1;
573       uint8_t _pad:6;
574    } reg;
575 };
576 
577 enum intrinsic_const_indices_encoding {
578    /* Use the 9 bits of packed_const_indices to store 1-9 indices.
579     * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
580     * 4 2-bit indices, or 5-9 1-bit indices.
581     *
582     * The common case for load_ubo is 0, 0, 0, which is trivially represented.
583     * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
584     */
585    const_indices_9bit_all_combined,
586 
587    const_indices_8bit,  /* 8 bits per element */
588    const_indices_16bit, /* 16 bits per element */
589    const_indices_32bit, /* 32 bits per element */
590 };
591 
592 enum load_const_packing {
593    /* Constants are not packed and are stored in following dwords. */
594    load_const_full,
595 
596    /* packed_value contains high 19 bits, low bits are 0,
597     * good for floating-point decimals
598     */
599    load_const_scalar_hi_19bits,
600 
601    /* packed_value contains low 19 bits, high bits are sign-extended */
602    load_const_scalar_lo_19bits_sext,
603 };
604 
605 union packed_instr {
606    uint32_t u32;
607    struct {
608       unsigned instr_type:4; /* always present */
609       unsigned _pad:20;
610       unsigned dest:8;       /* always last */
611    } any;
612    struct {
613       unsigned instr_type:4;
614       unsigned exact:1;
615       unsigned no_signed_wrap:1;
616       unsigned no_unsigned_wrap:1;
617       unsigned saturate:1;
618       /* Reg: writemask; SSA: swizzles for 2 srcs */
619       unsigned writemask_or_two_swizzles:4;
620       unsigned op:9;
621       unsigned packed_src_ssa_16bit:1;
622       /* Scalarized ALUs always have the same header. */
623       unsigned num_followup_alu_sharing_header:2;
624       unsigned dest:8;
625    } alu;
626    struct {
627       unsigned instr_type:4;
628       unsigned deref_type:3;
629       unsigned cast_type_same_as_last:1;
630       unsigned modes:14; /* deref_var redefines this */
631       unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
632       unsigned _pad:1;  /* deref_var redefines this */
633       unsigned dest:8;
634    } deref;
635    struct {
636       unsigned instr_type:4;
637       unsigned deref_type:3;
638       unsigned _pad:1;
639       unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
640       unsigned dest:8;
641    } deref_var;
642    struct {
643       unsigned instr_type:4;
644       unsigned intrinsic:9;
645       unsigned const_indices_encoding:2;
646       unsigned packed_const_indices:9;
647       unsigned dest:8;
648    } intrinsic;
649    struct {
650       unsigned instr_type:4;
651       unsigned last_component:4;
652       unsigned bit_size:3;
653       unsigned packing:2; /* enum load_const_packing */
654       unsigned packed_value:19; /* meaning determined by packing */
655    } load_const;
656    struct {
657       unsigned instr_type:4;
658       unsigned last_component:4;
659       unsigned bit_size:3;
660       unsigned _pad:21;
661    } undef;
662    struct {
663       unsigned instr_type:4;
664       unsigned num_srcs:4;
665       unsigned op:4;
666       unsigned dest:8;
667       unsigned _pad:12;
668    } tex;
669    struct {
670       unsigned instr_type:4;
671       unsigned num_srcs:20;
672       unsigned dest:8;
673    } phi;
674    struct {
675       unsigned instr_type:4;
676       unsigned type:2;
677       unsigned _pad:26;
678    } jump;
679 };
680 
681 /* Write "lo24" as low 24 bits in the first uint32. */
682 static void
write_dest(write_ctx * ctx,const nir_dest * dst,union packed_instr header,nir_instr_type instr_type)683 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
684            nir_instr_type instr_type)
685 {
686    STATIC_ASSERT(sizeof(union packed_dest) == 1);
687    union packed_dest dest;
688    dest.u8 = 0;
689 
690    dest.ssa.is_ssa = dst->is_ssa;
691    if (dst->is_ssa) {
692       dest.ssa.num_components =
693          encode_num_components_in_3bits(dst->ssa.num_components);
694       dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
695    } else {
696       dest.reg.is_indirect = !!(dst->reg.indirect);
697    }
698    header.any.dest = dest.u8;
699 
700    /* Check if the current ALU instruction has the same header as the previous
701     * instruction that is also ALU. If it is, we don't have to write
702     * the current header. This is a typical occurence after scalarization.
703     */
704    if (instr_type == nir_instr_type_alu) {
705       bool equal_header = false;
706 
707       if (ctx->last_instr_type == nir_instr_type_alu) {
708          assert(ctx->last_alu_header_offset);
709          union packed_instr last_header;
710          memcpy(&last_header, ctx->blob->data + ctx->last_alu_header_offset,
711                 sizeof(last_header));
712 
713          /* Clear the field that counts ALUs with equal headers. */
714          union packed_instr clean_header;
715          clean_header.u32 = last_header.u32;
716          clean_header.alu.num_followup_alu_sharing_header = 0;
717 
718          /* There can be at most 4 consecutive ALU instructions
719           * sharing the same header.
720           */
721          if (last_header.alu.num_followup_alu_sharing_header < 3 &&
722              header.u32 == clean_header.u32) {
723             last_header.alu.num_followup_alu_sharing_header++;
724             memcpy(ctx->blob->data + ctx->last_alu_header_offset,
725                    &last_header, sizeof(last_header));
726 
727             equal_header = true;
728          }
729       }
730 
731       if (!equal_header) {
732          ctx->last_alu_header_offset = ctx->blob->size;
733          blob_write_uint32(ctx->blob, header.u32);
734       }
735    } else {
736       blob_write_uint32(ctx->blob, header.u32);
737    }
738 
739    if (dest.ssa.is_ssa &&
740        dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
741       blob_write_uint32(ctx->blob, dst->ssa.num_components);
742 
743    if (dst->is_ssa) {
744       write_add_object(ctx, &dst->ssa);
745    } else {
746       blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
747       blob_write_uint32(ctx->blob, dst->reg.base_offset);
748       if (dst->reg.indirect)
749          write_src(ctx, dst->reg.indirect);
750    }
751 }
752 
753 static void
read_dest(read_ctx * ctx,nir_dest * dst,nir_instr * instr,union packed_instr header)754 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
755           union packed_instr header)
756 {
757    union packed_dest dest;
758    dest.u8 = header.any.dest;
759 
760    if (dest.ssa.is_ssa) {
761       unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
762       unsigned num_components;
763       if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
764          num_components = blob_read_uint32(ctx->blob);
765       else
766          num_components = decode_num_components_in_3bits(dest.ssa.num_components);
767       nir_ssa_dest_init(instr, dst, num_components, bit_size, NULL);
768       read_add_object(ctx, &dst->ssa);
769    } else {
770       dst->reg.reg = read_object(ctx);
771       dst->reg.base_offset = blob_read_uint32(ctx->blob);
772       if (dest.reg.is_indirect) {
773          dst->reg.indirect = malloc(sizeof(nir_src));
774          read_src(ctx, dst->reg.indirect, instr);
775       }
776    }
777 }
778 
779 static bool
are_object_ids_16bit(write_ctx * ctx)780 are_object_ids_16bit(write_ctx *ctx)
781 {
782    /* Check the highest object ID, because they are monotonic. */
783    return ctx->next_idx < (1 << 16);
784 }
785 
786 static bool
is_alu_src_ssa_16bit(write_ctx * ctx,const nir_alu_instr * alu)787 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
788 {
789    unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
790 
791    for (unsigned i = 0; i < num_srcs; i++) {
792       if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
793          return false;
794 
795       unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
796 
797       for (unsigned chan = 0; chan < src_components; chan++) {
798          /* The swizzles for src0.x and src1.x are stored
799           * in writemask_or_two_swizzles for SSA ALUs.
800           */
801          if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
802              alu->src[i].swizzle[chan] < 4)
803             continue;
804 
805          if (alu->src[i].swizzle[chan] != chan)
806             return false;
807       }
808    }
809 
810    return are_object_ids_16bit(ctx);
811 }
812 
813 static void
write_alu(write_ctx * ctx,const nir_alu_instr * alu)814 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
815 {
816    unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
817    unsigned dst_components = nir_dest_num_components(alu->dest.dest);
818 
819    /* 9 bits for nir_op */
820    STATIC_ASSERT(nir_num_opcodes <= 512);
821    union packed_instr header;
822    header.u32 = 0;
823 
824    header.alu.instr_type = alu->instr.type;
825    header.alu.exact = alu->exact;
826    header.alu.no_signed_wrap = alu->no_signed_wrap;
827    header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
828    header.alu.saturate = alu->dest.saturate;
829    header.alu.op = alu->op;
830    header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
831 
832    if (header.alu.packed_src_ssa_16bit &&
833        alu->dest.dest.is_ssa) {
834       /* For packed srcs of SSA ALUs, this field stores the swizzles. */
835       header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
836       if (num_srcs > 1)
837          header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
838    } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
839       /* For vec4 registers, this field is a writemask. */
840       header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
841    }
842 
843    write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
844 
845    if (!alu->dest.dest.is_ssa && dst_components > 4)
846       blob_write_uint32(ctx->blob, alu->dest.write_mask);
847 
848    if (header.alu.packed_src_ssa_16bit) {
849       for (unsigned i = 0; i < num_srcs; i++) {
850          assert(alu->src[i].src.is_ssa);
851          unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
852          assert(idx < (1 << 16));
853          blob_write_uint16(ctx->blob, idx);
854       }
855    } else {
856       for (unsigned i = 0; i < num_srcs; i++) {
857          unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
858          unsigned src_components = nir_src_num_components(alu->src[i].src);
859          union packed_src src;
860          bool packed = src_components <= 4 && src_channels <= 4;
861          src.u32 = 0;
862 
863          src.alu.negate = alu->src[i].negate;
864          src.alu.abs = alu->src[i].abs;
865 
866          if (packed) {
867             src.alu.swizzle_x = alu->src[i].swizzle[0];
868             src.alu.swizzle_y = alu->src[i].swizzle[1];
869             src.alu.swizzle_z = alu->src[i].swizzle[2];
870             src.alu.swizzle_w = alu->src[i].swizzle[3];
871          }
872 
873          write_src_full(ctx, &alu->src[i].src, src);
874 
875          /* Store swizzles for vec8 and vec16. */
876          if (!packed) {
877             for (unsigned o = 0; o < src_channels; o += 8) {
878                unsigned value = 0;
879 
880                for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
881                   value |= (uint32_t)alu->src[i].swizzle[o + j] <<
882                            (4 * j); /* 4 bits per swizzle */
883                }
884 
885                blob_write_uint32(ctx->blob, value);
886             }
887          }
888       }
889    }
890 }
891 
892 static nir_alu_instr *
read_alu(read_ctx * ctx,union packed_instr header)893 read_alu(read_ctx *ctx, union packed_instr header)
894 {
895    unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
896    nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
897 
898    alu->exact = header.alu.exact;
899    alu->no_signed_wrap = header.alu.no_signed_wrap;
900    alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
901    alu->dest.saturate = header.alu.saturate;
902 
903    read_dest(ctx, &alu->dest.dest, &alu->instr, header);
904 
905    unsigned dst_components = nir_dest_num_components(alu->dest.dest);
906 
907    if (alu->dest.dest.is_ssa) {
908       alu->dest.write_mask = u_bit_consecutive(0, dst_components);
909    } else if (dst_components <= 4) {
910       alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
911    } else {
912       alu->dest.write_mask = blob_read_uint32(ctx->blob);
913    }
914 
915    if (header.alu.packed_src_ssa_16bit) {
916       for (unsigned i = 0; i < num_srcs; i++) {
917          nir_alu_src *src = &alu->src[i];
918          src->src.is_ssa = true;
919          src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
920 
921          memset(&src->swizzle, 0, sizeof(src->swizzle));
922 
923          unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
924 
925          for (unsigned chan = 0; chan < src_components; chan++)
926             src->swizzle[chan] = chan;
927       }
928    } else {
929       for (unsigned i = 0; i < num_srcs; i++) {
930          union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
931          unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
932          unsigned src_components = nir_src_num_components(alu->src[i].src);
933          bool packed = src_components <= 4 && src_channels <= 4;
934 
935          alu->src[i].negate = src.alu.negate;
936          alu->src[i].abs = src.alu.abs;
937 
938          memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
939 
940          if (packed) {
941             alu->src[i].swizzle[0] = src.alu.swizzle_x;
942             alu->src[i].swizzle[1] = src.alu.swizzle_y;
943             alu->src[i].swizzle[2] = src.alu.swizzle_z;
944             alu->src[i].swizzle[3] = src.alu.swizzle_w;
945          } else {
946             /* Load swizzles for vec8 and vec16. */
947             for (unsigned o = 0; o < src_channels; o += 8) {
948                unsigned value = blob_read_uint32(ctx->blob);
949 
950                for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
951                   alu->src[i].swizzle[o + j] =
952                      (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
953                }
954             }
955          }
956       }
957    }
958 
959    if (header.alu.packed_src_ssa_16bit &&
960        alu->dest.dest.is_ssa) {
961       alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
962       if (num_srcs > 1)
963          alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
964    }
965 
966    return alu;
967 }
968 
969 static void
write_deref(write_ctx * ctx,const nir_deref_instr * deref)970 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
971 {
972    assert(deref->deref_type < 8);
973    assert(deref->modes < (1 << 14));
974 
975    union packed_instr header;
976    header.u32 = 0;
977 
978    header.deref.instr_type = deref->instr.type;
979    header.deref.deref_type = deref->deref_type;
980 
981    if (deref->deref_type == nir_deref_type_cast) {
982       header.deref.modes = deref->modes;
983       header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
984    }
985 
986    unsigned var_idx = 0;
987    if (deref->deref_type == nir_deref_type_var) {
988       var_idx = write_lookup_object(ctx, deref->var);
989       if (var_idx && var_idx < (1 << 16))
990          header.deref_var.object_idx = var_idx;
991    }
992 
993    if (deref->deref_type == nir_deref_type_array ||
994        deref->deref_type == nir_deref_type_ptr_as_array) {
995       header.deref.packed_src_ssa_16bit =
996          deref->parent.is_ssa && deref->arr.index.is_ssa &&
997          are_object_ids_16bit(ctx);
998    }
999 
1000    write_dest(ctx, &deref->dest, header, deref->instr.type);
1001 
1002    switch (deref->deref_type) {
1003    case nir_deref_type_var:
1004       if (!header.deref_var.object_idx)
1005          blob_write_uint32(ctx->blob, var_idx);
1006       break;
1007 
1008    case nir_deref_type_struct:
1009       write_src(ctx, &deref->parent);
1010       blob_write_uint32(ctx->blob, deref->strct.index);
1011       break;
1012 
1013    case nir_deref_type_array:
1014    case nir_deref_type_ptr_as_array:
1015       if (header.deref.packed_src_ssa_16bit) {
1016          blob_write_uint16(ctx->blob,
1017                            write_lookup_object(ctx, deref->parent.ssa));
1018          blob_write_uint16(ctx->blob,
1019                            write_lookup_object(ctx, deref->arr.index.ssa));
1020       } else {
1021          write_src(ctx, &deref->parent);
1022          write_src(ctx, &deref->arr.index);
1023       }
1024       break;
1025 
1026    case nir_deref_type_cast:
1027       write_src(ctx, &deref->parent);
1028       blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1029       blob_write_uint32(ctx->blob, deref->cast.align_mul);
1030       blob_write_uint32(ctx->blob, deref->cast.align_offset);
1031       if (!header.deref.cast_type_same_as_last) {
1032          encode_type_to_blob(ctx->blob, deref->type);
1033          ctx->last_type = deref->type;
1034       }
1035       break;
1036 
1037    case nir_deref_type_array_wildcard:
1038       write_src(ctx, &deref->parent);
1039       break;
1040 
1041    default:
1042       unreachable("Invalid deref type");
1043    }
1044 }
1045 
1046 static nir_deref_instr *
read_deref(read_ctx * ctx,union packed_instr header)1047 read_deref(read_ctx *ctx, union packed_instr header)
1048 {
1049    nir_deref_type deref_type = header.deref.deref_type;
1050    nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1051 
1052    read_dest(ctx, &deref->dest, &deref->instr, header);
1053 
1054    nir_deref_instr *parent;
1055 
1056    switch (deref->deref_type) {
1057    case nir_deref_type_var:
1058       if (header.deref_var.object_idx)
1059          deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1060       else
1061          deref->var = read_object(ctx);
1062 
1063       deref->type = deref->var->type;
1064       break;
1065 
1066    case nir_deref_type_struct:
1067       read_src(ctx, &deref->parent, &deref->instr);
1068       parent = nir_src_as_deref(deref->parent);
1069       deref->strct.index = blob_read_uint32(ctx->blob);
1070       deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1071       break;
1072 
1073    case nir_deref_type_array:
1074    case nir_deref_type_ptr_as_array:
1075       if (header.deref.packed_src_ssa_16bit) {
1076          deref->parent.is_ssa = true;
1077          deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1078          deref->arr.index.is_ssa = true;
1079          deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1080       } else {
1081          read_src(ctx, &deref->parent, &deref->instr);
1082          read_src(ctx, &deref->arr.index, &deref->instr);
1083       }
1084 
1085       parent = nir_src_as_deref(deref->parent);
1086       if (deref->deref_type == nir_deref_type_array)
1087          deref->type = glsl_get_array_element(parent->type);
1088       else
1089          deref->type = parent->type;
1090       break;
1091 
1092    case nir_deref_type_cast:
1093       read_src(ctx, &deref->parent, &deref->instr);
1094       deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1095       deref->cast.align_mul = blob_read_uint32(ctx->blob);
1096       deref->cast.align_offset = blob_read_uint32(ctx->blob);
1097       if (header.deref.cast_type_same_as_last) {
1098          deref->type = ctx->last_type;
1099       } else {
1100          deref->type = decode_type_from_blob(ctx->blob);
1101          ctx->last_type = deref->type;
1102       }
1103       break;
1104 
1105    case nir_deref_type_array_wildcard:
1106       read_src(ctx, &deref->parent, &deref->instr);
1107       parent = nir_src_as_deref(deref->parent);
1108       deref->type = glsl_get_array_element(parent->type);
1109       break;
1110 
1111    default:
1112       unreachable("Invalid deref type");
1113    }
1114 
1115    if (deref_type == nir_deref_type_var) {
1116       deref->modes = deref->var->data.mode;
1117    } else if (deref->deref_type == nir_deref_type_cast) {
1118       deref->modes = header.deref.modes;
1119    } else {
1120       assert(deref->parent.is_ssa);
1121       deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
1122    }
1123 
1124    return deref;
1125 }
1126 
1127 static void
write_intrinsic(write_ctx * ctx,const nir_intrinsic_instr * intrin)1128 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1129 {
1130    /* 9 bits for nir_intrinsic_op */
1131    STATIC_ASSERT(nir_num_intrinsics <= 512);
1132    unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1133    unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1134    assert(intrin->intrinsic < 512);
1135 
1136    union packed_instr header;
1137    header.u32 = 0;
1138 
1139    header.intrinsic.instr_type = intrin->instr.type;
1140    header.intrinsic.intrinsic = intrin->intrinsic;
1141 
1142    /* Analyze constant indices to decide how to encode them. */
1143    if (num_indices) {
1144       unsigned max_bits = 0;
1145       for (unsigned i = 0; i < num_indices; i++) {
1146          unsigned max = util_last_bit(intrin->const_index[i]);
1147          max_bits = MAX2(max_bits, max);
1148       }
1149 
1150       if (max_bits * num_indices <= 9) {
1151          header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined;
1152 
1153          /* Pack all const indices into 6 bits. */
1154          unsigned bit_size = 9 / num_indices;
1155          for (unsigned i = 0; i < num_indices; i++) {
1156             header.intrinsic.packed_const_indices |=
1157                intrin->const_index[i] << (i * bit_size);
1158          }
1159       } else if (max_bits <= 8)
1160          header.intrinsic.const_indices_encoding = const_indices_8bit;
1161       else if (max_bits <= 16)
1162          header.intrinsic.const_indices_encoding = const_indices_16bit;
1163       else
1164          header.intrinsic.const_indices_encoding = const_indices_32bit;
1165    }
1166 
1167    if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1168       write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1169    else
1170       blob_write_uint32(ctx->blob, header.u32);
1171 
1172    for (unsigned i = 0; i < num_srcs; i++)
1173       write_src(ctx, &intrin->src[i]);
1174 
1175    if (num_indices) {
1176       switch (header.intrinsic.const_indices_encoding) {
1177       case const_indices_8bit:
1178          for (unsigned i = 0; i < num_indices; i++)
1179             blob_write_uint8(ctx->blob, intrin->const_index[i]);
1180          break;
1181       case const_indices_16bit:
1182          for (unsigned i = 0; i < num_indices; i++)
1183             blob_write_uint16(ctx->blob, intrin->const_index[i]);
1184          break;
1185       case const_indices_32bit:
1186          for (unsigned i = 0; i < num_indices; i++)
1187             blob_write_uint32(ctx->blob, intrin->const_index[i]);
1188          break;
1189       }
1190    }
1191 }
1192 
1193 static nir_intrinsic_instr *
read_intrinsic(read_ctx * ctx,union packed_instr header)1194 read_intrinsic(read_ctx *ctx, union packed_instr header)
1195 {
1196    nir_intrinsic_op op = header.intrinsic.intrinsic;
1197    nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1198 
1199    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1200    unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1201 
1202    if (nir_intrinsic_infos[op].has_dest)
1203       read_dest(ctx, &intrin->dest, &intrin->instr, header);
1204 
1205    for (unsigned i = 0; i < num_srcs; i++)
1206       read_src(ctx, &intrin->src[i], &intrin->instr);
1207 
1208    /* Vectorized instrinsics have num_components same as dst or src that has
1209     * 0 components in the info. Find it.
1210     */
1211    if (nir_intrinsic_infos[op].has_dest &&
1212        nir_intrinsic_infos[op].dest_components == 0) {
1213       intrin->num_components = nir_dest_num_components(intrin->dest);
1214    } else {
1215       for (unsigned i = 0; i < num_srcs; i++) {
1216          if (nir_intrinsic_infos[op].src_components[i] == 0) {
1217             intrin->num_components = nir_src_num_components(intrin->src[i]);
1218             break;
1219          }
1220       }
1221    }
1222 
1223    if (num_indices) {
1224       switch (header.intrinsic.const_indices_encoding) {
1225       case const_indices_9bit_all_combined: {
1226          unsigned bit_size = 9 / num_indices;
1227          unsigned bit_mask = u_bit_consecutive(0, bit_size);
1228          for (unsigned i = 0; i < num_indices; i++) {
1229             intrin->const_index[i] =
1230                (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1231                bit_mask;
1232          }
1233          break;
1234       }
1235       case const_indices_8bit:
1236          for (unsigned i = 0; i < num_indices; i++)
1237             intrin->const_index[i] = blob_read_uint8(ctx->blob);
1238          break;
1239       case const_indices_16bit:
1240          for (unsigned i = 0; i < num_indices; i++)
1241             intrin->const_index[i] = blob_read_uint16(ctx->blob);
1242          break;
1243       case const_indices_32bit:
1244          for (unsigned i = 0; i < num_indices; i++)
1245             intrin->const_index[i] = blob_read_uint32(ctx->blob);
1246          break;
1247       }
1248    }
1249 
1250    return intrin;
1251 }
1252 
1253 static void
write_load_const(write_ctx * ctx,const nir_load_const_instr * lc)1254 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1255 {
1256    assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1257    union packed_instr header;
1258    header.u32 = 0;
1259 
1260    header.load_const.instr_type = lc->instr.type;
1261    header.load_const.last_component = lc->def.num_components - 1;
1262    header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1263    header.load_const.packing = load_const_full;
1264 
1265    /* Try to pack 1-component constants into the 19 free bits in the header. */
1266    if (lc->def.num_components == 1) {
1267       switch (lc->def.bit_size) {
1268       case 64:
1269          if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1270             /* packed_value contains high 19 bits, low bits are 0 */
1271             header.load_const.packing = load_const_scalar_hi_19bits;
1272             header.load_const.packed_value = lc->value[0].u64 >> 45;
1273          } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
1274             /* packed_value contains low 19 bits, high bits are sign-extended */
1275             header.load_const.packing = load_const_scalar_lo_19bits_sext;
1276             header.load_const.packed_value = lc->value[0].u64;
1277          }
1278          break;
1279 
1280       case 32:
1281          if ((lc->value[0].u32 & 0x1fff) == 0) {
1282             header.load_const.packing = load_const_scalar_hi_19bits;
1283             header.load_const.packed_value = lc->value[0].u32 >> 13;
1284          } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
1285             header.load_const.packing = load_const_scalar_lo_19bits_sext;
1286             header.load_const.packed_value = lc->value[0].u32;
1287          }
1288          break;
1289 
1290       case 16:
1291          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1292          header.load_const.packed_value = lc->value[0].u16;
1293          break;
1294       case 8:
1295          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1296          header.load_const.packed_value = lc->value[0].u8;
1297          break;
1298       case 1:
1299          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1300          header.load_const.packed_value = lc->value[0].b;
1301          break;
1302       default:
1303          unreachable("invalid bit_size");
1304       }
1305    }
1306 
1307    blob_write_uint32(ctx->blob, header.u32);
1308 
1309    if (header.load_const.packing == load_const_full) {
1310       switch (lc->def.bit_size) {
1311       case 64:
1312          blob_write_bytes(ctx->blob, lc->value,
1313                           sizeof(*lc->value) * lc->def.num_components);
1314          break;
1315 
1316       case 32:
1317          for (unsigned i = 0; i < lc->def.num_components; i++)
1318             blob_write_uint32(ctx->blob, lc->value[i].u32);
1319          break;
1320 
1321       case 16:
1322          for (unsigned i = 0; i < lc->def.num_components; i++)
1323             blob_write_uint16(ctx->blob, lc->value[i].u16);
1324          break;
1325 
1326       default:
1327          assert(lc->def.bit_size <= 8);
1328          for (unsigned i = 0; i < lc->def.num_components; i++)
1329             blob_write_uint8(ctx->blob, lc->value[i].u8);
1330          break;
1331       }
1332    }
1333 
1334    write_add_object(ctx, &lc->def);
1335 }
1336 
1337 static nir_load_const_instr *
read_load_const(read_ctx * ctx,union packed_instr header)1338 read_load_const(read_ctx *ctx, union packed_instr header)
1339 {
1340    nir_load_const_instr *lc =
1341       nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1342                                   decode_bit_size_3bits(header.load_const.bit_size));
1343 
1344    switch (header.load_const.packing) {
1345    case load_const_scalar_hi_19bits:
1346       switch (lc->def.bit_size) {
1347       case 64:
1348          lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1349          break;
1350       case 32:
1351          lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1352          break;
1353       default:
1354          unreachable("invalid bit_size");
1355       }
1356       break;
1357 
1358    case load_const_scalar_lo_19bits_sext:
1359       switch (lc->def.bit_size) {
1360       case 64:
1361          lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1362          break;
1363       case 32:
1364          lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1365          break;
1366       case 16:
1367          lc->value[0].u16 = header.load_const.packed_value;
1368          break;
1369       case 8:
1370          lc->value[0].u8 = header.load_const.packed_value;
1371          break;
1372       case 1:
1373          lc->value[0].b = header.load_const.packed_value;
1374          break;
1375       default:
1376          unreachable("invalid bit_size");
1377       }
1378       break;
1379 
1380    case load_const_full:
1381       switch (lc->def.bit_size) {
1382       case 64:
1383          blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1384          break;
1385 
1386       case 32:
1387          for (unsigned i = 0; i < lc->def.num_components; i++)
1388             lc->value[i].u32 = blob_read_uint32(ctx->blob);
1389          break;
1390 
1391       case 16:
1392          for (unsigned i = 0; i < lc->def.num_components; i++)
1393             lc->value[i].u16 = blob_read_uint16(ctx->blob);
1394          break;
1395 
1396       default:
1397          assert(lc->def.bit_size <= 8);
1398          for (unsigned i = 0; i < lc->def.num_components; i++)
1399             lc->value[i].u8 = blob_read_uint8(ctx->blob);
1400          break;
1401       }
1402       break;
1403    }
1404 
1405    read_add_object(ctx, &lc->def);
1406    return lc;
1407 }
1408 
1409 static void
write_ssa_undef(write_ctx * ctx,const nir_ssa_undef_instr * undef)1410 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1411 {
1412    assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1413 
1414    union packed_instr header;
1415    header.u32 = 0;
1416 
1417    header.undef.instr_type = undef->instr.type;
1418    header.undef.last_component = undef->def.num_components - 1;
1419    header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1420 
1421    blob_write_uint32(ctx->blob, header.u32);
1422    write_add_object(ctx, &undef->def);
1423 }
1424 
1425 static nir_ssa_undef_instr *
read_ssa_undef(read_ctx * ctx,union packed_instr header)1426 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1427 {
1428    nir_ssa_undef_instr *undef =
1429       nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1430                                  decode_bit_size_3bits(header.undef.bit_size));
1431 
1432    read_add_object(ctx, &undef->def);
1433    return undef;
1434 }
1435 
1436 union packed_tex_data {
1437    uint32_t u32;
1438    struct {
1439       unsigned sampler_dim:4;
1440       unsigned dest_type:8;
1441       unsigned coord_components:3;
1442       unsigned is_array:1;
1443       unsigned is_shadow:1;
1444       unsigned is_new_style_shadow:1;
1445       unsigned is_sparse:1;
1446       unsigned component:2;
1447       unsigned texture_non_uniform:1;
1448       unsigned sampler_non_uniform:1;
1449       unsigned array_is_lowered_cube:1;
1450       unsigned unused:6; /* Mark unused for valgrind. */
1451    } u;
1452 };
1453 
1454 static void
write_tex(write_ctx * ctx,const nir_tex_instr * tex)1455 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1456 {
1457    assert(tex->num_srcs < 16);
1458    assert(tex->op < 16);
1459 
1460    union packed_instr header;
1461    header.u32 = 0;
1462 
1463    header.tex.instr_type = tex->instr.type;
1464    header.tex.num_srcs = tex->num_srcs;
1465    header.tex.op = tex->op;
1466 
1467    write_dest(ctx, &tex->dest, header, tex->instr.type);
1468 
1469    blob_write_uint32(ctx->blob, tex->texture_index);
1470    blob_write_uint32(ctx->blob, tex->sampler_index);
1471    if (tex->op == nir_texop_tg4)
1472       blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1473 
1474    STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1475    union packed_tex_data packed = {
1476       .u.sampler_dim = tex->sampler_dim,
1477       .u.dest_type = tex->dest_type,
1478       .u.coord_components = tex->coord_components,
1479       .u.is_array = tex->is_array,
1480       .u.is_shadow = tex->is_shadow,
1481       .u.is_new_style_shadow = tex->is_new_style_shadow,
1482       .u.is_sparse = tex->is_sparse,
1483       .u.component = tex->component,
1484       .u.texture_non_uniform = tex->texture_non_uniform,
1485       .u.sampler_non_uniform = tex->sampler_non_uniform,
1486       .u.array_is_lowered_cube = tex->array_is_lowered_cube,
1487    };
1488    blob_write_uint32(ctx->blob, packed.u32);
1489 
1490    for (unsigned i = 0; i < tex->num_srcs; i++) {
1491       union packed_src src;
1492       src.u32 = 0;
1493       src.tex.src_type = tex->src[i].src_type;
1494       write_src_full(ctx, &tex->src[i].src, src);
1495    }
1496 }
1497 
1498 static nir_tex_instr *
read_tex(read_ctx * ctx,union packed_instr header)1499 read_tex(read_ctx *ctx, union packed_instr header)
1500 {
1501    nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1502 
1503    read_dest(ctx, &tex->dest, &tex->instr, header);
1504 
1505    tex->op = header.tex.op;
1506    tex->texture_index = blob_read_uint32(ctx->blob);
1507    tex->sampler_index = blob_read_uint32(ctx->blob);
1508    if (tex->op == nir_texop_tg4)
1509       blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1510 
1511    union packed_tex_data packed;
1512    packed.u32 = blob_read_uint32(ctx->blob);
1513    tex->sampler_dim = packed.u.sampler_dim;
1514    tex->dest_type = packed.u.dest_type;
1515    tex->coord_components = packed.u.coord_components;
1516    tex->is_array = packed.u.is_array;
1517    tex->is_shadow = packed.u.is_shadow;
1518    tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1519    tex->is_sparse = packed.u.is_sparse;
1520    tex->component = packed.u.component;
1521    tex->texture_non_uniform = packed.u.texture_non_uniform;
1522    tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1523    tex->array_is_lowered_cube = packed.u.array_is_lowered_cube;
1524 
1525    for (unsigned i = 0; i < tex->num_srcs; i++) {
1526       union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1527       tex->src[i].src_type = src.tex.src_type;
1528    }
1529 
1530    return tex;
1531 }
1532 
1533 static void
write_phi(write_ctx * ctx,const nir_phi_instr * phi)1534 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1535 {
1536    union packed_instr header;
1537    header.u32 = 0;
1538 
1539    header.phi.instr_type = phi->instr.type;
1540    header.phi.num_srcs = exec_list_length(&phi->srcs);
1541 
1542    /* Phi nodes are special, since they may reference SSA definitions and
1543     * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1544     * and then store enough information so that a later fixup pass can fill
1545     * them in correctly.
1546     */
1547    write_dest(ctx, &phi->dest, header, phi->instr.type);
1548 
1549    nir_foreach_phi_src(src, phi) {
1550       assert(src->src.is_ssa);
1551       size_t blob_offset = blob_reserve_uint32(ctx->blob);
1552       ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1553       assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1554       write_phi_fixup fixup = {
1555          .blob_offset = blob_offset,
1556          .src = src->src.ssa,
1557          .block = src->pred,
1558       };
1559       util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1560    }
1561 }
1562 
1563 static void
write_fixup_phis(write_ctx * ctx)1564 write_fixup_phis(write_ctx *ctx)
1565 {
1566    util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1567       uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
1568       blob_ptr[0] = write_lookup_object(ctx, fixup->src);
1569       blob_ptr[1] = write_lookup_object(ctx, fixup->block);
1570    }
1571 
1572    util_dynarray_clear(&ctx->phi_fixups);
1573 }
1574 
1575 static nir_phi_instr *
read_phi(read_ctx * ctx,nir_block * blk,union packed_instr header)1576 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1577 {
1578    nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1579 
1580    read_dest(ctx, &phi->dest, &phi->instr, header);
1581 
1582    /* For similar reasons as before, we just store the index directly into the
1583     * pointer, and let a later pass resolve the phi sources.
1584     *
1585     * In order to ensure that the copied sources (which are just the indices
1586     * from the blob for now) don't get inserted into the old shader's use-def
1587     * lists, we have to add the phi instruction *before* we set up its
1588     * sources.
1589     */
1590    nir_instr_insert_after_block(blk, &phi->instr);
1591 
1592    for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1593       nir_ssa_def *def = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1594       nir_block *pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1595       nir_phi_src *src = nir_phi_instr_add_src(phi, pred, nir_src_for_ssa(def));
1596 
1597       /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1598        * we have to set the parent_instr manually.  It doesn't really matter
1599        * when we do it, so we might as well do it here.
1600        */
1601       src->src.parent_instr = &phi->instr;
1602 
1603       /* Stash it in the list of phi sources.  We'll walk this list and fix up
1604        * sources at the very end of read_function_impl.
1605        */
1606       list_add(&src->src.use_link, &ctx->phi_srcs);
1607    }
1608 
1609    return phi;
1610 }
1611 
1612 static void
read_fixup_phis(read_ctx * ctx)1613 read_fixup_phis(read_ctx *ctx)
1614 {
1615    list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1616       src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1617       src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1618 
1619       /* Remove from this list */
1620       list_del(&src->src.use_link);
1621 
1622       list_addtail(&src->src.use_link, &src->src.ssa->uses);
1623    }
1624    assert(list_is_empty(&ctx->phi_srcs));
1625 }
1626 
1627 static void
write_jump(write_ctx * ctx,const nir_jump_instr * jmp)1628 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1629 {
1630    /* These aren't handled because they require special block linking */
1631    assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
1632 
1633    assert(jmp->type < 4);
1634 
1635    union packed_instr header;
1636    header.u32 = 0;
1637 
1638    header.jump.instr_type = jmp->instr.type;
1639    header.jump.type = jmp->type;
1640 
1641    blob_write_uint32(ctx->blob, header.u32);
1642 }
1643 
1644 static nir_jump_instr *
read_jump(read_ctx * ctx,union packed_instr header)1645 read_jump(read_ctx *ctx, union packed_instr header)
1646 {
1647    /* These aren't handled because they require special block linking */
1648    assert(header.jump.type != nir_jump_goto &&
1649           header.jump.type != nir_jump_goto_if);
1650 
1651    nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1652    return jmp;
1653 }
1654 
1655 static void
write_call(write_ctx * ctx,const nir_call_instr * call)1656 write_call(write_ctx *ctx, const nir_call_instr *call)
1657 {
1658    blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1659 
1660    for (unsigned i = 0; i < call->num_params; i++)
1661       write_src(ctx, &call->params[i]);
1662 }
1663 
1664 static nir_call_instr *
read_call(read_ctx * ctx)1665 read_call(read_ctx *ctx)
1666 {
1667    nir_function *callee = read_object(ctx);
1668    nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1669 
1670    for (unsigned i = 0; i < call->num_params; i++)
1671       read_src(ctx, &call->params[i], call);
1672 
1673    return call;
1674 }
1675 
1676 static void
write_instr(write_ctx * ctx,const nir_instr * instr)1677 write_instr(write_ctx *ctx, const nir_instr *instr)
1678 {
1679    /* We have only 4 bits for the instruction type. */
1680    assert(instr->type < 16);
1681 
1682    switch (instr->type) {
1683    case nir_instr_type_alu:
1684       write_alu(ctx, nir_instr_as_alu(instr));
1685       break;
1686    case nir_instr_type_deref:
1687       write_deref(ctx, nir_instr_as_deref(instr));
1688       break;
1689    case nir_instr_type_intrinsic:
1690       write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1691       break;
1692    case nir_instr_type_load_const:
1693       write_load_const(ctx, nir_instr_as_load_const(instr));
1694       break;
1695    case nir_instr_type_ssa_undef:
1696       write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1697       break;
1698    case nir_instr_type_tex:
1699       write_tex(ctx, nir_instr_as_tex(instr));
1700       break;
1701    case nir_instr_type_phi:
1702       write_phi(ctx, nir_instr_as_phi(instr));
1703       break;
1704    case nir_instr_type_jump:
1705       write_jump(ctx, nir_instr_as_jump(instr));
1706       break;
1707    case nir_instr_type_call:
1708       blob_write_uint32(ctx->blob, instr->type);
1709       write_call(ctx, nir_instr_as_call(instr));
1710       break;
1711    case nir_instr_type_parallel_copy:
1712       unreachable("Cannot write parallel copies");
1713    default:
1714       unreachable("bad instr type");
1715    }
1716 }
1717 
1718 /* Return the number of instructions read. */
1719 static unsigned
read_instr(read_ctx * ctx,nir_block * block)1720 read_instr(read_ctx *ctx, nir_block *block)
1721 {
1722    STATIC_ASSERT(sizeof(union packed_instr) == 4);
1723    union packed_instr header;
1724    header.u32 = blob_read_uint32(ctx->blob);
1725    nir_instr *instr;
1726 
1727    switch (header.any.instr_type) {
1728    case nir_instr_type_alu:
1729       for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1730          nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1731       return header.alu.num_followup_alu_sharing_header + 1;
1732    case nir_instr_type_deref:
1733       instr = &read_deref(ctx, header)->instr;
1734       break;
1735    case nir_instr_type_intrinsic:
1736       instr = &read_intrinsic(ctx, header)->instr;
1737       break;
1738    case nir_instr_type_load_const:
1739       instr = &read_load_const(ctx, header)->instr;
1740       break;
1741    case nir_instr_type_ssa_undef:
1742       instr = &read_ssa_undef(ctx, header)->instr;
1743       break;
1744    case nir_instr_type_tex:
1745       instr = &read_tex(ctx, header)->instr;
1746       break;
1747    case nir_instr_type_phi:
1748       /* Phi instructions are a bit of a special case when reading because we
1749        * don't want inserting the instruction to automatically handle use/defs
1750        * for us.  Instead, we need to wait until all the blocks/instructions
1751        * are read so that we can set their sources up.
1752        */
1753       read_phi(ctx, block, header);
1754       return 1;
1755    case nir_instr_type_jump:
1756       instr = &read_jump(ctx, header)->instr;
1757       break;
1758    case nir_instr_type_call:
1759       instr = &read_call(ctx)->instr;
1760       break;
1761    case nir_instr_type_parallel_copy:
1762       unreachable("Cannot read parallel copies");
1763    default:
1764       unreachable("bad instr type");
1765    }
1766 
1767    nir_instr_insert_after_block(block, instr);
1768    return 1;
1769 }
1770 
1771 static void
write_block(write_ctx * ctx,const nir_block * block)1772 write_block(write_ctx *ctx, const nir_block *block)
1773 {
1774    write_add_object(ctx, block);
1775    blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1776 
1777    ctx->last_instr_type = ~0;
1778    ctx->last_alu_header_offset = 0;
1779 
1780    nir_foreach_instr(instr, block) {
1781       write_instr(ctx, instr);
1782       ctx->last_instr_type = instr->type;
1783    }
1784 }
1785 
1786 static void
read_block(read_ctx * ctx,struct exec_list * cf_list)1787 read_block(read_ctx *ctx, struct exec_list *cf_list)
1788 {
1789    /* Don't actually create a new block.  Just use the one from the tail of
1790     * the list.  NIR guarantees that the tail of the list is a block and that
1791     * no two blocks are side-by-side in the IR;  It should be empty.
1792     */
1793    nir_block *block =
1794       exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1795 
1796    read_add_object(ctx, block);
1797    unsigned num_instrs = blob_read_uint32(ctx->blob);
1798    for (unsigned i = 0; i < num_instrs;) {
1799       i += read_instr(ctx, block);
1800    }
1801 }
1802 
1803 static void
1804 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1805 
1806 static void
1807 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1808 
1809 static void
write_if(write_ctx * ctx,nir_if * nif)1810 write_if(write_ctx *ctx, nir_if *nif)
1811 {
1812    write_src(ctx, &nif->condition);
1813    blob_write_uint8(ctx->blob, nif->control);
1814 
1815    write_cf_list(ctx, &nif->then_list);
1816    write_cf_list(ctx, &nif->else_list);
1817 }
1818 
1819 static void
read_if(read_ctx * ctx,struct exec_list * cf_list)1820 read_if(read_ctx *ctx, struct exec_list *cf_list)
1821 {
1822    nir_if *nif = nir_if_create(ctx->nir);
1823 
1824    read_src(ctx, &nif->condition, nif);
1825    nif->control = blob_read_uint8(ctx->blob);
1826 
1827    nir_cf_node_insert_end(cf_list, &nif->cf_node);
1828 
1829    read_cf_list(ctx, &nif->then_list);
1830    read_cf_list(ctx, &nif->else_list);
1831 }
1832 
1833 static void
write_loop(write_ctx * ctx,nir_loop * loop)1834 write_loop(write_ctx *ctx, nir_loop *loop)
1835 {
1836    blob_write_uint8(ctx->blob, loop->control);
1837    write_cf_list(ctx, &loop->body);
1838 }
1839 
1840 static void
read_loop(read_ctx * ctx,struct exec_list * cf_list)1841 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1842 {
1843    nir_loop *loop = nir_loop_create(ctx->nir);
1844 
1845    nir_cf_node_insert_end(cf_list, &loop->cf_node);
1846 
1847    loop->control = blob_read_uint8(ctx->blob);
1848    read_cf_list(ctx, &loop->body);
1849 }
1850 
1851 static void
write_cf_node(write_ctx * ctx,nir_cf_node * cf)1852 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1853 {
1854    blob_write_uint32(ctx->blob, cf->type);
1855 
1856    switch (cf->type) {
1857    case nir_cf_node_block:
1858       write_block(ctx, nir_cf_node_as_block(cf));
1859       break;
1860    case nir_cf_node_if:
1861       write_if(ctx, nir_cf_node_as_if(cf));
1862       break;
1863    case nir_cf_node_loop:
1864       write_loop(ctx, nir_cf_node_as_loop(cf));
1865       break;
1866    default:
1867       unreachable("bad cf type");
1868    }
1869 }
1870 
1871 static void
read_cf_node(read_ctx * ctx,struct exec_list * list)1872 read_cf_node(read_ctx *ctx, struct exec_list *list)
1873 {
1874    nir_cf_node_type type = blob_read_uint32(ctx->blob);
1875 
1876    switch (type) {
1877    case nir_cf_node_block:
1878       read_block(ctx, list);
1879       break;
1880    case nir_cf_node_if:
1881       read_if(ctx, list);
1882       break;
1883    case nir_cf_node_loop:
1884       read_loop(ctx, list);
1885       break;
1886    default:
1887       unreachable("bad cf type");
1888    }
1889 }
1890 
1891 static void
write_cf_list(write_ctx * ctx,const struct exec_list * cf_list)1892 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1893 {
1894    blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1895    foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1896       write_cf_node(ctx, cf);
1897    }
1898 }
1899 
1900 static void
read_cf_list(read_ctx * ctx,struct exec_list * cf_list)1901 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1902 {
1903    uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1904    for (unsigned i = 0; i < num_cf_nodes; i++)
1905       read_cf_node(ctx, cf_list);
1906 }
1907 
1908 static void
write_function_impl(write_ctx * ctx,const nir_function_impl * fi)1909 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1910 {
1911    blob_write_uint8(ctx->blob, fi->structured);
1912 
1913    write_var_list(ctx, &fi->locals);
1914    write_reg_list(ctx, &fi->registers);
1915    blob_write_uint32(ctx->blob, fi->reg_alloc);
1916 
1917    write_cf_list(ctx, &fi->body);
1918    write_fixup_phis(ctx);
1919 }
1920 
1921 static nir_function_impl *
read_function_impl(read_ctx * ctx,nir_function * fxn)1922 read_function_impl(read_ctx *ctx, nir_function *fxn)
1923 {
1924    nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1925    fi->function = fxn;
1926 
1927    fi->structured = blob_read_uint8(ctx->blob);
1928 
1929    read_var_list(ctx, &fi->locals);
1930    read_reg_list(ctx, &fi->registers);
1931    fi->reg_alloc = blob_read_uint32(ctx->blob);
1932 
1933    read_cf_list(ctx, &fi->body);
1934    read_fixup_phis(ctx);
1935 
1936    fi->valid_metadata = 0;
1937 
1938    return fi;
1939 }
1940 
1941 static void
write_function(write_ctx * ctx,const nir_function * fxn)1942 write_function(write_ctx *ctx, const nir_function *fxn)
1943 {
1944    uint32_t flags = fxn->is_entrypoint;
1945    if (fxn->name)
1946       flags |= 0x2;
1947    if (fxn->impl)
1948       flags |= 0x4;
1949    blob_write_uint32(ctx->blob, flags);
1950    if (fxn->name)
1951       blob_write_string(ctx->blob, fxn->name);
1952 
1953    write_add_object(ctx, fxn);
1954 
1955    blob_write_uint32(ctx->blob, fxn->num_params);
1956    for (unsigned i = 0; i < fxn->num_params; i++) {
1957       uint32_t val =
1958          ((uint32_t)fxn->params[i].num_components) |
1959          ((uint32_t)fxn->params[i].bit_size) << 8;
1960       blob_write_uint32(ctx->blob, val);
1961    }
1962 
1963    /* At first glance, it looks like we should write the function_impl here.
1964     * However, call instructions need to be able to reference at least the
1965     * function and those will get processed as we write the function_impls.
1966     * We stop here and write function_impls as a second pass.
1967     */
1968 }
1969 
1970 static void
read_function(read_ctx * ctx)1971 read_function(read_ctx *ctx)
1972 {
1973    uint32_t flags = blob_read_uint32(ctx->blob);
1974    bool has_name = flags & 0x2;
1975    char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1976 
1977    nir_function *fxn = nir_function_create(ctx->nir, name);
1978 
1979    read_add_object(ctx, fxn);
1980 
1981    fxn->num_params = blob_read_uint32(ctx->blob);
1982    fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1983    for (unsigned i = 0; i < fxn->num_params; i++) {
1984       uint32_t val = blob_read_uint32(ctx->blob);
1985       fxn->params[i].num_components = val & 0xff;
1986       fxn->params[i].bit_size = (val >> 8) & 0xff;
1987    }
1988 
1989    fxn->is_entrypoint = flags & 0x1;
1990    if (flags & 0x4)
1991       fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1992 }
1993 
1994 /**
1995  * Serialize NIR into a binary blob.
1996  *
1997  * \param strip  Don't serialize information only useful for debugging,
1998  *               such as variable names, making cache hits from similar
1999  *               shaders more likely.
2000  */
2001 void
nir_serialize(struct blob * blob,const nir_shader * nir,bool strip)2002 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
2003 {
2004    write_ctx ctx = {0};
2005    ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2006    ctx.blob = blob;
2007    ctx.nir = nir;
2008    ctx.strip = strip;
2009    util_dynarray_init(&ctx.phi_fixups, NULL);
2010 
2011    size_t idx_size_offset = blob_reserve_uint32(blob);
2012 
2013    struct shader_info info = nir->info;
2014    uint32_t strings = 0;
2015    if (!strip && info.name)
2016       strings |= 0x1;
2017    if (!strip && info.label)
2018       strings |= 0x2;
2019    blob_write_uint32(blob, strings);
2020    if (!strip && info.name)
2021       blob_write_string(blob, info.name);
2022    if (!strip && info.label)
2023       blob_write_string(blob, info.label);
2024    info.name = info.label = NULL;
2025    blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2026 
2027    write_var_list(&ctx, &nir->variables);
2028 
2029    blob_write_uint32(blob, nir->num_inputs);
2030    blob_write_uint32(blob, nir->num_uniforms);
2031    blob_write_uint32(blob, nir->num_outputs);
2032    blob_write_uint32(blob, nir->scratch_size);
2033 
2034    blob_write_uint32(blob, exec_list_length(&nir->functions));
2035    nir_foreach_function(fxn, nir) {
2036       write_function(&ctx, fxn);
2037    }
2038 
2039    nir_foreach_function(fxn, nir) {
2040       if (fxn->impl)
2041          write_function_impl(&ctx, fxn->impl);
2042    }
2043 
2044    blob_write_uint32(blob, nir->constant_data_size);
2045    if (nir->constant_data_size > 0)
2046       blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2047 
2048    *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
2049 
2050    _mesa_hash_table_destroy(ctx.remap_table, NULL);
2051    util_dynarray_fini(&ctx.phi_fixups);
2052 }
2053 
2054 nir_shader *
nir_deserialize(void * mem_ctx,const struct nir_shader_compiler_options * options,struct blob_reader * blob)2055 nir_deserialize(void *mem_ctx,
2056                 const struct nir_shader_compiler_options *options,
2057                 struct blob_reader *blob)
2058 {
2059    read_ctx ctx = {0};
2060    ctx.blob = blob;
2061    list_inithead(&ctx.phi_srcs);
2062    ctx.idx_table_len = blob_read_uint32(blob);
2063    ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2064 
2065    uint32_t strings = blob_read_uint32(blob);
2066    char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2067    char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2068 
2069    struct shader_info info;
2070    blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2071 
2072    ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2073 
2074    info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2075    info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2076 
2077    ctx.nir->info = info;
2078 
2079    read_var_list(&ctx, &ctx.nir->variables);
2080 
2081    ctx.nir->num_inputs = blob_read_uint32(blob);
2082    ctx.nir->num_uniforms = blob_read_uint32(blob);
2083    ctx.nir->num_outputs = blob_read_uint32(blob);
2084    ctx.nir->scratch_size = blob_read_uint32(blob);
2085 
2086    unsigned num_functions = blob_read_uint32(blob);
2087    for (unsigned i = 0; i < num_functions; i++)
2088       read_function(&ctx);
2089 
2090    nir_foreach_function(fxn, ctx.nir) {
2091       if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2092          fxn->impl = read_function_impl(&ctx, fxn);
2093    }
2094 
2095    ctx.nir->constant_data_size = blob_read_uint32(blob);
2096    if (ctx.nir->constant_data_size > 0) {
2097       ctx.nir->constant_data =
2098          ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2099       blob_copy_bytes(blob, ctx.nir->constant_data,
2100                       ctx.nir->constant_data_size);
2101    }
2102 
2103    free(ctx.idx_table);
2104 
2105    nir_validate_shader(ctx.nir, "after deserialize");
2106 
2107    return ctx.nir;
2108 }
2109 
2110 void
nir_shader_serialize_deserialize(nir_shader * shader)2111 nir_shader_serialize_deserialize(nir_shader *shader)
2112 {
2113    const struct nir_shader_compiler_options *options = shader->options;
2114 
2115    struct blob writer;
2116    blob_init(&writer);
2117    nir_serialize(&writer, shader, false);
2118 
2119    /* Delete all of dest's ralloc children but leave dest alone */
2120    void *dead_ctx = ralloc_context(NULL);
2121    ralloc_adopt(dead_ctx, shader);
2122    ralloc_free(dead_ctx);
2123 
2124    dead_ctx = ralloc_context(NULL);
2125 
2126    struct blob_reader reader;
2127    blob_reader_init(&reader, writer.data, writer.size);
2128    nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2129 
2130    blob_finish(&writer);
2131 
2132    nir_shader_replace(shader, copy);
2133    ralloc_free(dead_ctx);
2134 }
2135