• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Connor Abbott
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_serialize.h"
25 #include "util/u_dynarray.h"
26 #include "util/u_math.h"
27 #include "nir_control_flow.h"
28 #include "nir_xfb_info.h"
29 
30 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
31 #define MAX_OBJECT_IDS              (1 << 20)
32 
33 typedef struct {
34    size_t blob_offset;
35    nir_def *src;
36    nir_block *block;
37 } write_phi_fixup;
38 
39 typedef struct {
40    const nir_shader *nir;
41 
42    struct blob *blob;
43 
44    /* maps pointer to index */
45    struct hash_table *remap_table;
46 
47    /* the next index to assign to a NIR in-memory object */
48    uint32_t next_idx;
49 
50    /* Array of write_phi_fixup structs representing phi sources that need to
51     * be resolved in the second pass.
52     */
53    struct util_dynarray phi_fixups;
54 
55    /* The last serialized type. */
56    const struct glsl_type *last_type;
57    const struct glsl_type *last_interface_type;
58    struct nir_variable_data last_var_data;
59 
60    /* For skipping equal ALU headers (typical after scalarization). */
61    nir_instr_type last_instr_type;
62    uintptr_t last_alu_header_offset;
63    uint32_t last_alu_header;
64 
65    /* Don't write optional data such as variable names. */
66    bool strip;
67 } write_ctx;
68 
69 typedef struct {
70    nir_shader *nir;
71 
72    struct blob_reader *blob;
73 
74    /* the next index to assign to a NIR in-memory object */
75    uint32_t next_idx;
76 
77    /* The length of the index -> object table */
78    uint32_t idx_table_len;
79 
80    /* map from index to deserialized pointer */
81    void **idx_table;
82 
83    /* List of phi sources. */
84    struct list_head phi_srcs;
85 
86    /* The last deserialized type. */
87    const struct glsl_type *last_type;
88    const struct glsl_type *last_interface_type;
89    struct nir_variable_data last_var_data;
90 } read_ctx;
91 
92 static void
write_add_object(write_ctx * ctx,const void * obj)93 write_add_object(write_ctx *ctx, const void *obj)
94 {
95    uint32_t index = ctx->next_idx++;
96    assert(index != MAX_OBJECT_IDS);
97    _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t)index);
98 }
99 
100 static uint32_t
write_lookup_object(write_ctx * ctx,const void * obj)101 write_lookup_object(write_ctx *ctx, const void *obj)
102 {
103    struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
104    assert(entry);
105    return (uint32_t)(uintptr_t)entry->data;
106 }
107 
108 static void
read_add_object(read_ctx * ctx,void * obj)109 read_add_object(read_ctx *ctx, void *obj)
110 {
111    assert(ctx->next_idx < ctx->idx_table_len);
112    ctx->idx_table[ctx->next_idx++] = obj;
113 }
114 
115 static void *
read_lookup_object(read_ctx * ctx,uint32_t idx)116 read_lookup_object(read_ctx *ctx, uint32_t idx)
117 {
118    assert(idx < ctx->idx_table_len);
119    return ctx->idx_table[idx];
120 }
121 
122 static void *
read_object(read_ctx * ctx)123 read_object(read_ctx *ctx)
124 {
125    return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
126 }
127 
128 static uint32_t
encode_bit_size_3bits(uint8_t bit_size)129 encode_bit_size_3bits(uint8_t bit_size)
130 {
131    /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
132    assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
133    if (bit_size)
134       return util_logbase2(bit_size) + 1;
135    return 0;
136 }
137 
138 static uint8_t
decode_bit_size_3bits(uint8_t bit_size)139 decode_bit_size_3bits(uint8_t bit_size)
140 {
141    if (bit_size)
142       return 1 << (bit_size - 1);
143    return 0;
144 }
145 
146 #define NUM_COMPONENTS_IS_SEPARATE_7 7
147 
148 static uint8_t
encode_num_components_in_3bits(uint8_t num_components)149 encode_num_components_in_3bits(uint8_t num_components)
150 {
151    if (num_components <= 4)
152       return num_components;
153    if (num_components == 8)
154       return 5;
155    if (num_components == 16)
156       return 6;
157 
158    /* special value indicating that num_components is in the next uint32 */
159    return NUM_COMPONENTS_IS_SEPARATE_7;
160 }
161 
162 static uint8_t
decode_num_components_in_3bits(uint8_t value)163 decode_num_components_in_3bits(uint8_t value)
164 {
165    if (value <= 4)
166       return value;
167    if (value == 5)
168       return 8;
169    if (value == 6)
170       return 16;
171 
172    unreachable("invalid num_components encoding");
173    return 0;
174 }
175 
176 static void
write_constant(write_ctx * ctx,const nir_constant * c)177 write_constant(write_ctx *ctx, const nir_constant *c)
178 {
179    blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
180    blob_write_uint32(ctx->blob, c->num_elements);
181    for (unsigned i = 0; i < c->num_elements; i++)
182       write_constant(ctx, c->elements[i]);
183 }
184 
185 static nir_constant *
read_constant(read_ctx * ctx,nir_variable * nvar)186 read_constant(read_ctx *ctx, nir_variable *nvar)
187 {
188    nir_constant *c = ralloc(nvar, nir_constant);
189 
190    static const nir_const_value zero_vals[ARRAY_SIZE(c->values)] = { 0 };
191    blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
192    c->is_null_constant = memcmp(c->values, zero_vals, sizeof(c->values)) == 0;
193    c->num_elements = blob_read_uint32(ctx->blob);
194    c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
195    for (unsigned i = 0; i < c->num_elements; i++) {
196       c->elements[i] = read_constant(ctx, nvar);
197       c->is_null_constant &= c->elements[i]->is_null_constant;
198    }
199 
200    return c;
201 }
202 
203 enum var_data_encoding {
204    var_encode_full,
205    var_encode_shader_temp,
206    var_encode_function_temp,
207    var_encode_location_diff,
208 };
209 
210 union packed_var {
211    uint32_t u32;
212    struct {
213       unsigned has_name : 1;
214       unsigned has_constant_initializer : 1;
215       unsigned has_pointer_initializer : 1;
216       unsigned has_interface_type : 1;
217       unsigned num_state_slots : 7;
218       unsigned data_encoding : 2;
219       unsigned type_same_as_last : 1;
220       unsigned interface_type_same_as_last : 1;
221       unsigned ray_query : 1;
222       unsigned num_members : 16;
223    } u;
224 };
225 
226 union packed_var_data_diff {
227    uint32_t u32;
228    struct {
229       int location : 13;
230       int location_frac : 3;
231       int driver_location : 16;
232    } u;
233 };
234 
235 static void
write_variable(write_ctx * ctx,const nir_variable * var)236 write_variable(write_ctx *ctx, const nir_variable *var)
237 {
238    write_add_object(ctx, var);
239 
240    assert(var->num_state_slots < (1 << 7));
241 
242    STATIC_ASSERT(sizeof(union packed_var) == 4);
243    union packed_var flags;
244    flags.u32 = 0;
245 
246    flags.u.has_name = !ctx->strip && var->name;
247    flags.u.has_constant_initializer = !!(var->constant_initializer);
248    flags.u.has_pointer_initializer = !!(var->pointer_initializer);
249    flags.u.has_interface_type = !!(var->interface_type);
250    flags.u.type_same_as_last = var->type == ctx->last_type;
251    flags.u.interface_type_same_as_last =
252       var->interface_type && var->interface_type == ctx->last_interface_type;
253    flags.u.num_state_slots = var->num_state_slots;
254    flags.u.num_members = var->num_members;
255 
256    struct nir_variable_data data = var->data;
257 
258    /* When stripping, we expect that the location is no longer needed,
259     * which is typically after shaders are linked.
260     */
261    if (ctx->strip &&
262        data.mode != nir_var_system_value &&
263        data.mode != nir_var_shader_in &&
264        data.mode != nir_var_shader_out)
265       data.location = 0;
266 
267    /* Temporary variables don't serialize var->data. */
268    if (data.mode == nir_var_shader_temp)
269       flags.u.data_encoding = var_encode_shader_temp;
270    else if (data.mode == nir_var_function_temp)
271       flags.u.data_encoding = var_encode_function_temp;
272    else {
273       struct nir_variable_data tmp = data;
274 
275       tmp.location = ctx->last_var_data.location;
276       tmp.location_frac = ctx->last_var_data.location_frac;
277       tmp.driver_location = ctx->last_var_data.driver_location;
278 
279       /* See if we can encode only the difference in locations from the last
280        * variable.
281        */
282       if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
283           abs((int)data.location -
284               (int)ctx->last_var_data.location) < (1 << 12) &&
285           abs((int)data.driver_location -
286               (int)ctx->last_var_data.driver_location) < (1 << 15))
287          flags.u.data_encoding = var_encode_location_diff;
288       else
289          flags.u.data_encoding = var_encode_full;
290    }
291 
292    flags.u.ray_query = var->data.ray_query;
293 
294    blob_write_uint32(ctx->blob, flags.u32);
295 
296    if (!flags.u.type_same_as_last) {
297       encode_type_to_blob(ctx->blob, var->type);
298       ctx->last_type = var->type;
299    }
300 
301    if (var->interface_type && !flags.u.interface_type_same_as_last) {
302       encode_type_to_blob(ctx->blob, var->interface_type);
303       ctx->last_interface_type = var->interface_type;
304    }
305 
306    if (flags.u.has_name)
307       blob_write_string(ctx->blob, var->name);
308 
309    if (flags.u.data_encoding == var_encode_full ||
310        flags.u.data_encoding == var_encode_location_diff) {
311       if (flags.u.data_encoding == var_encode_full) {
312          blob_write_bytes(ctx->blob, &data, sizeof(data));
313       } else {
314          /* Serialize only the difference in locations from the last variable.
315           */
316          union packed_var_data_diff diff;
317 
318          diff.u.location = data.location - ctx->last_var_data.location;
319          diff.u.location_frac = data.location_frac -
320                                 ctx->last_var_data.location_frac;
321          diff.u.driver_location = data.driver_location -
322                                   ctx->last_var_data.driver_location;
323 
324          blob_write_uint32(ctx->blob, diff.u32);
325       }
326 
327       ctx->last_var_data = data;
328    }
329 
330    for (unsigned i = 0; i < var->num_state_slots; i++) {
331       blob_write_bytes(ctx->blob, &var->state_slots[i],
332                        sizeof(var->state_slots[i]));
333    }
334    if (var->constant_initializer)
335       write_constant(ctx, var->constant_initializer);
336    if (var->pointer_initializer)
337       blob_write_uint32(ctx->blob,
338                         write_lookup_object(ctx, var->pointer_initializer));
339    if (var->num_members > 0) {
340       blob_write_bytes(ctx->blob, (uint8_t *)var->members,
341                        var->num_members * sizeof(*var->members));
342    }
343 }
344 
345 static nir_variable *
read_variable(read_ctx * ctx)346 read_variable(read_ctx *ctx)
347 {
348    nir_variable *var = rzalloc(ctx->nir, nir_variable);
349    read_add_object(ctx, var);
350 
351    union packed_var flags;
352    flags.u32 = blob_read_uint32(ctx->blob);
353 
354    if (flags.u.type_same_as_last) {
355       var->type = ctx->last_type;
356    } else {
357       var->type = decode_type_from_blob(ctx->blob);
358       ctx->last_type = var->type;
359    }
360 
361    if (flags.u.has_interface_type) {
362       if (flags.u.interface_type_same_as_last) {
363          var->interface_type = ctx->last_interface_type;
364       } else {
365          var->interface_type = decode_type_from_blob(ctx->blob);
366          ctx->last_interface_type = var->interface_type;
367       }
368    }
369 
370    if (flags.u.has_name) {
371       const char *name = blob_read_string(ctx->blob);
372       var->name = ralloc_strdup(var, name);
373    } else {
374       var->name = NULL;
375    }
376 
377    if (flags.u.data_encoding == var_encode_shader_temp)
378       var->data.mode = nir_var_shader_temp;
379    else if (flags.u.data_encoding == var_encode_function_temp)
380       var->data.mode = nir_var_function_temp;
381    else if (flags.u.data_encoding == var_encode_full) {
382       blob_copy_bytes(ctx->blob, (uint8_t *)&var->data, sizeof(var->data));
383       ctx->last_var_data = var->data;
384    } else { /* var_encode_location_diff */
385       union packed_var_data_diff diff;
386       diff.u32 = blob_read_uint32(ctx->blob);
387 
388       var->data = ctx->last_var_data;
389       var->data.location += diff.u.location;
390       var->data.location_frac += diff.u.location_frac;
391       var->data.driver_location += diff.u.driver_location;
392 
393       ctx->last_var_data = var->data;
394    }
395 
396    var->data.ray_query = flags.u.ray_query;
397 
398    var->num_state_slots = flags.u.num_state_slots;
399    if (var->num_state_slots != 0) {
400       var->state_slots = ralloc_array(var, nir_state_slot,
401                                       var->num_state_slots);
402       for (unsigned i = 0; i < var->num_state_slots; i++) {
403          blob_copy_bytes(ctx->blob, &var->state_slots[i],
404                          sizeof(var->state_slots[i]));
405       }
406    }
407    if (flags.u.has_constant_initializer)
408       var->constant_initializer = read_constant(ctx, var);
409    else
410       var->constant_initializer = NULL;
411 
412    if (flags.u.has_pointer_initializer)
413       var->pointer_initializer = read_object(ctx);
414    else
415       var->pointer_initializer = NULL;
416 
417    var->num_members = flags.u.num_members;
418    if (var->num_members > 0) {
419       var->members = ralloc_array(var, struct nir_variable_data,
420                                   var->num_members);
421       blob_copy_bytes(ctx->blob, (uint8_t *)var->members,
422                       var->num_members * sizeof(*var->members));
423    }
424 
425    return var;
426 }
427 
428 static void
write_var_list(write_ctx * ctx,const struct exec_list * src)429 write_var_list(write_ctx *ctx, const struct exec_list *src)
430 {
431    blob_write_uint32(ctx->blob, exec_list_length(src));
432    foreach_list_typed(nir_variable, var, node, src) {
433       write_variable(ctx, var);
434    }
435 }
436 
437 static void
read_var_list(read_ctx * ctx,struct exec_list * dst)438 read_var_list(read_ctx *ctx, struct exec_list *dst)
439 {
440    exec_list_make_empty(dst);
441    unsigned num_vars = blob_read_uint32(ctx->blob);
442    for (unsigned i = 0; i < num_vars; i++) {
443       nir_variable *var = read_variable(ctx);
444       exec_list_push_tail(dst, &var->node);
445    }
446 }
447 
448 union packed_src {
449    uint32_t u32;
450    struct {
451       unsigned _pad : 2; /* <-- Header */
452       unsigned object_idx : 20;
453       unsigned _footer : 10; /* <-- Footer */
454    } any;
455    struct {
456       unsigned _header : 22; /* <-- Header */
457       unsigned _pad : 2;     /* <-- Footer */
458       unsigned swizzle_x : 2;
459       unsigned swizzle_y : 2;
460       unsigned swizzle_z : 2;
461       unsigned swizzle_w : 2;
462    } alu;
463    struct {
464       unsigned _header : 22; /* <-- Header */
465       unsigned src_type : 5; /* <-- Footer */
466       unsigned _pad : 5;
467    } tex;
468 };
469 
470 static void
write_src_full(write_ctx * ctx,const nir_src * src,union packed_src header)471 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
472 {
473    header.any.object_idx = write_lookup_object(ctx, src->ssa);
474    blob_write_uint32(ctx->blob, header.u32);
475 }
476 
477 static void
write_src(write_ctx * ctx,const nir_src * src)478 write_src(write_ctx *ctx, const nir_src *src)
479 {
480    union packed_src header = { 0 };
481    write_src_full(ctx, src, header);
482 }
483 
484 static union packed_src
read_src(read_ctx * ctx,nir_src * src)485 read_src(read_ctx *ctx, nir_src *src)
486 {
487    STATIC_ASSERT(sizeof(union packed_src) == 4);
488    union packed_src header;
489    header.u32 = blob_read_uint32(ctx->blob);
490 
491    src->ssa = read_lookup_object(ctx, header.any.object_idx);
492    return header;
493 }
494 
495 union packed_def {
496    uint8_t u8;
497    struct {
498       uint8_t _pad : 1;
499       uint8_t num_components : 3;
500       uint8_t bit_size : 3;
501       uint8_t divergent : 1;
502    };
503 };
504 
505 enum intrinsic_const_indices_encoding {
506    /* Use packed_const_indices to store tightly packed indices.
507     *
508     * The common case for load_ubo is 0, 0, 0, which is trivially represented.
509     * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
510     */
511    const_indices_all_combined,
512 
513    const_indices_8bit,  /* 8 bits per element */
514    const_indices_16bit, /* 16 bits per element */
515    const_indices_32bit, /* 32 bits per element */
516 };
517 
518 enum load_const_packing {
519    /* Constants are not packed and are stored in following dwords. */
520    load_const_full,
521 
522    /* packed_value contains high 19 bits, low bits are 0,
523     * good for floating-point decimals
524     */
525    load_const_scalar_hi_19bits,
526 
527    /* packed_value contains low 19 bits, high bits are sign-extended */
528    load_const_scalar_lo_19bits_sext,
529 };
530 
531 union packed_instr {
532    uint32_t u32;
533    struct {
534       unsigned instr_type : 4; /* always present */
535       unsigned _pad : 20;
536       unsigned def : 8; /* always last */
537    } any;
538    struct {
539       unsigned instr_type : 4;
540       unsigned exact : 1;
541       unsigned no_signed_wrap : 1;
542       unsigned no_unsigned_wrap : 1;
543       unsigned padding : 1;
544       /* Reg: writemask; SSA: swizzles for 2 srcs */
545       unsigned writemask_or_two_swizzles : 4;
546       unsigned op : 9;
547       unsigned packed_src_ssa_16bit : 1;
548       /* Scalarized ALUs always have the same header. */
549       unsigned num_followup_alu_sharing_header : 2;
550       unsigned def : 8;
551    } alu;
552    struct {
553       unsigned instr_type : 4;
554       unsigned deref_type : 3;
555       unsigned cast_type_same_as_last : 1;
556       unsigned modes : 5; /* See (de|en)code_deref_modes() */
557       unsigned _pad : 9;
558       unsigned in_bounds : 1;
559       unsigned packed_src_ssa_16bit : 1; /* deref_var redefines this */
560       unsigned def : 8;
561    } deref;
562    struct {
563       unsigned instr_type : 4;
564       unsigned deref_type : 3;
565       unsigned _pad : 1;
566       unsigned object_idx : 16; /* if 0, the object ID is a separate uint32 */
567       unsigned def : 8;
568    } deref_var;
569    struct {
570       unsigned instr_type : 4;
571       unsigned intrinsic : 10;
572       unsigned const_indices_encoding : 2;
573       unsigned packed_const_indices : 8;
574       unsigned def : 8;
575    } intrinsic;
576    struct {
577       unsigned instr_type : 4;
578       unsigned last_component : 4;
579       unsigned bit_size : 3;
580       unsigned packing : 2;       /* enum load_const_packing */
581       unsigned packed_value : 19; /* meaning determined by packing */
582    } load_const;
583    struct {
584       unsigned instr_type : 4;
585       unsigned last_component : 4;
586       unsigned bit_size : 3;
587       unsigned _pad : 21;
588    } undef;
589    struct {
590       unsigned instr_type : 4;
591       unsigned num_srcs : 4;
592       unsigned op : 5;
593       unsigned _pad : 11;
594       unsigned def : 8;
595    } tex;
596    struct {
597       unsigned instr_type : 4;
598       unsigned num_srcs : 20;
599       unsigned def : 8;
600    } phi;
601    struct {
602       unsigned instr_type : 4;
603       unsigned type : 2;
604       unsigned _pad : 26;
605    } jump;
606 };
607 
608 /* Write "lo24" as low 24 bits in the first uint32. */
609 static void
write_def(write_ctx * ctx,const nir_def * def,union packed_instr header,nir_instr_type instr_type)610 write_def(write_ctx *ctx, const nir_def *def, union packed_instr header,
611           nir_instr_type instr_type)
612 {
613    STATIC_ASSERT(sizeof(union packed_def) == 1);
614    union packed_def pdef;
615    pdef.u8 = 0;
616 
617    pdef.num_components =
618       encode_num_components_in_3bits(def->num_components);
619    pdef.bit_size = encode_bit_size_3bits(def->bit_size);
620    pdef.divergent = def->divergent;
621    header.any.def = pdef.u8;
622 
623    /* Check if the current ALU instruction has the same header as the previous
624     * instruction that is also ALU. If it is, we don't have to write
625     * the current header. This is a typical occurence after scalarization.
626     */
627    if (instr_type == nir_instr_type_alu) {
628       bool equal_header = false;
629 
630       if (ctx->last_instr_type == nir_instr_type_alu) {
631          assert(ctx->last_alu_header_offset);
632          union packed_instr last_header;
633          last_header.u32 = ctx->last_alu_header;
634 
635          /* Clear the field that counts ALUs with equal headers. */
636          union packed_instr clean_header;
637          clean_header.u32 = last_header.u32;
638          clean_header.alu.num_followup_alu_sharing_header = 0;
639 
640          /* There can be at most 4 consecutive ALU instructions
641           * sharing the same header.
642           */
643          if (last_header.alu.num_followup_alu_sharing_header < 3 &&
644              header.u32 == clean_header.u32) {
645             last_header.alu.num_followup_alu_sharing_header++;
646             blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset,
647                                   last_header.u32);
648             ctx->last_alu_header = last_header.u32;
649             equal_header = true;
650          }
651       }
652 
653       if (!equal_header) {
654          ctx->last_alu_header_offset = blob_reserve_uint32(ctx->blob);
655          blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, header.u32);
656          ctx->last_alu_header = header.u32;
657       }
658    } else {
659       blob_write_uint32(ctx->blob, header.u32);
660    }
661 
662    if (pdef.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
663       blob_write_uint32(ctx->blob, def->num_components);
664 
665    write_add_object(ctx, def);
666 }
667 
668 static void
read_def(read_ctx * ctx,nir_def * def,nir_instr * instr,union packed_instr header)669 read_def(read_ctx *ctx, nir_def *def, nir_instr *instr,
670          union packed_instr header)
671 {
672    union packed_def pdef;
673    pdef.u8 = header.any.def;
674 
675    unsigned bit_size = decode_bit_size_3bits(pdef.bit_size);
676    unsigned num_components;
677    if (pdef.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
678       num_components = blob_read_uint32(ctx->blob);
679    else
680       num_components = decode_num_components_in_3bits(pdef.num_components);
681    nir_def_init(instr, def, num_components, bit_size);
682    def->divergent = pdef.divergent;
683    read_add_object(ctx, def);
684 }
685 
686 static bool
are_object_ids_16bit(write_ctx * ctx)687 are_object_ids_16bit(write_ctx *ctx)
688 {
689    /* Check the highest object ID, because they are monotonic. */
690    return ctx->next_idx < (1 << 16);
691 }
692 
693 static bool
is_alu_src_ssa_16bit(write_ctx * ctx,const nir_alu_instr * alu)694 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
695 {
696    unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
697 
698    for (unsigned i = 0; i < num_srcs; i++) {
699       unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
700 
701       for (unsigned chan = 0; chan < src_components; chan++) {
702          /* The swizzles for src0.x and src1.x are stored
703           * in writemask_or_two_swizzles for SSA ALUs.
704           */
705          if (i < 2 && chan == 0 && alu->src[i].swizzle[chan] < 4)
706             continue;
707 
708          if (alu->src[i].swizzle[chan] != chan)
709             return false;
710       }
711    }
712 
713    return are_object_ids_16bit(ctx);
714 }
715 
716 static void
write_alu(write_ctx * ctx,const nir_alu_instr * alu)717 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
718 {
719    unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
720 
721    /* 9 bits for nir_op */
722    STATIC_ASSERT(nir_num_opcodes <= 512);
723    union packed_instr header;
724    header.u32 = 0;
725 
726    header.alu.instr_type = alu->instr.type;
727    header.alu.exact = alu->exact;
728    header.alu.no_signed_wrap = alu->no_signed_wrap;
729    header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
730    header.alu.op = alu->op;
731    header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
732 
733    if (header.alu.packed_src_ssa_16bit) {
734       /* For packed srcs of SSA ALUs, this field stores the swizzles. */
735       header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
736       if (num_srcs > 1)
737          header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
738    }
739 
740    write_def(ctx, &alu->def, header, alu->instr.type);
741 
742    if (header.alu.packed_src_ssa_16bit) {
743       for (unsigned i = 0; i < num_srcs; i++) {
744          unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
745          assert(idx < (1 << 16));
746          blob_write_uint16(ctx->blob, idx);
747       }
748    } else {
749       for (unsigned i = 0; i < num_srcs; i++) {
750          unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
751          unsigned src_components = nir_src_num_components(alu->src[i].src);
752          union packed_src src;
753          bool packed = src_components <= 4 && src_channels <= 4;
754          src.u32 = 0;
755 
756          if (packed) {
757             src.alu.swizzle_x = alu->src[i].swizzle[0];
758             src.alu.swizzle_y = alu->src[i].swizzle[1];
759             src.alu.swizzle_z = alu->src[i].swizzle[2];
760             src.alu.swizzle_w = alu->src[i].swizzle[3];
761          }
762 
763          write_src_full(ctx, &alu->src[i].src, src);
764 
765          /* Store swizzles for vec8 and vec16. */
766          if (!packed) {
767             for (unsigned o = 0; o < src_channels; o += 8) {
768                unsigned value = 0;
769 
770                for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
771                   value |= (uint32_t)alu->src[i].swizzle[o + j] << (4 * j); /* 4 bits per swizzle */
772                }
773 
774                blob_write_uint32(ctx->blob, value);
775             }
776          }
777       }
778    }
779 }
780 
781 static nir_alu_instr *
read_alu(read_ctx * ctx,union packed_instr header)782 read_alu(read_ctx *ctx, union packed_instr header)
783 {
784    unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
785    nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
786 
787    alu->exact = header.alu.exact;
788    alu->no_signed_wrap = header.alu.no_signed_wrap;
789    alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
790 
791    read_def(ctx, &alu->def, &alu->instr, header);
792 
793    if (header.alu.packed_src_ssa_16bit) {
794       for (unsigned i = 0; i < num_srcs; i++) {
795          nir_alu_src *src = &alu->src[i];
796          src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
797 
798          memset(&src->swizzle, 0, sizeof(src->swizzle));
799 
800          unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
801 
802          for (unsigned chan = 0; chan < src_components; chan++)
803             src->swizzle[chan] = chan;
804       }
805    } else {
806       for (unsigned i = 0; i < num_srcs; i++) {
807          union packed_src src = read_src(ctx, &alu->src[i].src);
808          unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
809          unsigned src_components = nir_src_num_components(alu->src[i].src);
810          bool packed = src_components <= 4 && src_channels <= 4;
811 
812          memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
813 
814          if (packed) {
815             alu->src[i].swizzle[0] = src.alu.swizzle_x;
816             alu->src[i].swizzle[1] = src.alu.swizzle_y;
817             alu->src[i].swizzle[2] = src.alu.swizzle_z;
818             alu->src[i].swizzle[3] = src.alu.swizzle_w;
819          } else {
820             /* Load swizzles for vec8 and vec16. */
821             for (unsigned o = 0; o < src_channels; o += 8) {
822                unsigned value = blob_read_uint32(ctx->blob);
823 
824                for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
825                   alu->src[i].swizzle[o + j] =
826                      (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
827                }
828             }
829          }
830       }
831    }
832 
833    if (header.alu.packed_src_ssa_16bit) {
834       alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
835       if (num_srcs > 1)
836          alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
837    }
838 
839    return alu;
840 }
841 
842 #define MODE_ENC_GENERIC_BIT (1 << 4)
843 
844 static nir_variable_mode
decode_deref_modes(unsigned modes)845 decode_deref_modes(unsigned modes)
846 {
847    if (modes & MODE_ENC_GENERIC_BIT) {
848       modes &= ~MODE_ENC_GENERIC_BIT;
849       return modes << (ffs(nir_var_mem_generic) - 1);
850    } else {
851       return 1 << modes;
852    }
853 }
854 
855 static unsigned
encode_deref_modes(nir_variable_mode modes)856 encode_deref_modes(nir_variable_mode modes)
857 {
858    /* Mode sets on derefs generally come in two forms.  For certain OpenCL
859     * cases, we can have more than one of the generic modes set.  In this
860     * case, we need the full bitfield.  Fortunately, there are only 4 of
861     * these.  For all other modes, we can only have one mode at a time so we
862     * can compress them by only storing the bit position.  This, plus one bit
863     * to select encoding, lets us pack the entire bitfield in 5 bits.
864     */
865    STATIC_ASSERT((nir_var_all & ~nir_var_mem_generic) <
866                  (1 << MODE_ENC_GENERIC_BIT));
867 
868    unsigned enc;
869    if (modes == 0 || (modes & nir_var_mem_generic)) {
870       assert(!(modes & ~nir_var_mem_generic));
871       enc = modes >> (ffs(nir_var_mem_generic) - 1);
872       assert(enc < MODE_ENC_GENERIC_BIT);
873       enc |= MODE_ENC_GENERIC_BIT;
874    } else {
875       assert(util_is_power_of_two_nonzero(modes));
876       enc = ffs(modes) - 1;
877       assert(enc < MODE_ENC_GENERIC_BIT);
878    }
879    assert(modes == decode_deref_modes(enc));
880    return enc;
881 }
882 
883 static void
write_deref(write_ctx * ctx,const nir_deref_instr * deref)884 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
885 {
886    assert(deref->deref_type < 8);
887 
888    union packed_instr header;
889    header.u32 = 0;
890 
891    header.deref.instr_type = deref->instr.type;
892    header.deref.deref_type = deref->deref_type;
893 
894    if (deref->deref_type == nir_deref_type_cast) {
895       header.deref.modes = encode_deref_modes(deref->modes);
896       header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
897    }
898 
899    unsigned var_idx = 0;
900    if (deref->deref_type == nir_deref_type_var) {
901       var_idx = write_lookup_object(ctx, deref->var);
902       if (var_idx && var_idx < (1 << 16))
903          header.deref_var.object_idx = var_idx;
904    }
905 
906    if (deref->deref_type == nir_deref_type_array ||
907        deref->deref_type == nir_deref_type_ptr_as_array) {
908       header.deref.packed_src_ssa_16bit = are_object_ids_16bit(ctx);
909 
910       header.deref.in_bounds = deref->arr.in_bounds;
911    }
912 
913    write_def(ctx, &deref->def, header, deref->instr.type);
914 
915    switch (deref->deref_type) {
916    case nir_deref_type_var:
917       if (!header.deref_var.object_idx)
918          blob_write_uint32(ctx->blob, var_idx);
919       break;
920 
921    case nir_deref_type_struct:
922       write_src(ctx, &deref->parent);
923       blob_write_uint32(ctx->blob, deref->strct.index);
924       break;
925 
926    case nir_deref_type_array:
927    case nir_deref_type_ptr_as_array:
928       if (header.deref.packed_src_ssa_16bit) {
929          blob_write_uint16(ctx->blob,
930                            write_lookup_object(ctx, deref->parent.ssa));
931          blob_write_uint16(ctx->blob,
932                            write_lookup_object(ctx, deref->arr.index.ssa));
933       } else {
934          write_src(ctx, &deref->parent);
935          write_src(ctx, &deref->arr.index);
936       }
937       break;
938 
939    case nir_deref_type_cast:
940       write_src(ctx, &deref->parent);
941       blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
942       blob_write_uint32(ctx->blob, deref->cast.align_mul);
943       blob_write_uint32(ctx->blob, deref->cast.align_offset);
944       if (!header.deref.cast_type_same_as_last) {
945          encode_type_to_blob(ctx->blob, deref->type);
946          ctx->last_type = deref->type;
947       }
948       break;
949 
950    case nir_deref_type_array_wildcard:
951       write_src(ctx, &deref->parent);
952       break;
953 
954    default:
955       unreachable("Invalid deref type");
956    }
957 }
958 
959 static nir_deref_instr *
read_deref(read_ctx * ctx,union packed_instr header)960 read_deref(read_ctx *ctx, union packed_instr header)
961 {
962    nir_deref_type deref_type = header.deref.deref_type;
963    nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
964 
965    read_def(ctx, &deref->def, &deref->instr, header);
966 
967    nir_deref_instr *parent;
968 
969    switch (deref->deref_type) {
970    case nir_deref_type_var:
971       if (header.deref_var.object_idx)
972          deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
973       else
974          deref->var = read_object(ctx);
975 
976       deref->type = deref->var->type;
977       break;
978 
979    case nir_deref_type_struct:
980       read_src(ctx, &deref->parent);
981       parent = nir_src_as_deref(deref->parent);
982       deref->strct.index = blob_read_uint32(ctx->blob);
983       deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
984       break;
985 
986    case nir_deref_type_array:
987    case nir_deref_type_ptr_as_array:
988       if (header.deref.packed_src_ssa_16bit) {
989          deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
990          deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
991       } else {
992          read_src(ctx, &deref->parent);
993          read_src(ctx, &deref->arr.index);
994       }
995 
996       deref->arr.in_bounds = header.deref.in_bounds;
997 
998       parent = nir_src_as_deref(deref->parent);
999       if (deref->deref_type == nir_deref_type_array)
1000          deref->type = glsl_get_array_element(parent->type);
1001       else
1002          deref->type = parent->type;
1003       break;
1004 
1005    case nir_deref_type_cast:
1006       read_src(ctx, &deref->parent);
1007       deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1008       deref->cast.align_mul = blob_read_uint32(ctx->blob);
1009       deref->cast.align_offset = blob_read_uint32(ctx->blob);
1010       if (header.deref.cast_type_same_as_last) {
1011          deref->type = ctx->last_type;
1012       } else {
1013          deref->type = decode_type_from_blob(ctx->blob);
1014          ctx->last_type = deref->type;
1015       }
1016       break;
1017 
1018    case nir_deref_type_array_wildcard:
1019       read_src(ctx, &deref->parent);
1020       parent = nir_src_as_deref(deref->parent);
1021       deref->type = glsl_get_array_element(parent->type);
1022       break;
1023 
1024    default:
1025       unreachable("Invalid deref type");
1026    }
1027 
1028    if (deref_type == nir_deref_type_var) {
1029       deref->modes = deref->var->data.mode;
1030    } else if (deref->deref_type == nir_deref_type_cast) {
1031       deref->modes = decode_deref_modes(header.deref.modes);
1032    } else {
1033       deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
1034    }
1035 
1036    return deref;
1037 }
1038 
1039 static void
write_intrinsic(write_ctx * ctx,const nir_intrinsic_instr * intrin)1040 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1041 {
1042    /* 10 bits for nir_intrinsic_op */
1043    STATIC_ASSERT(nir_num_intrinsics <= 1024);
1044    unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1045    unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1046    assert(intrin->intrinsic < 1024);
1047 
1048    union packed_instr header;
1049    header.u32 = 0;
1050 
1051    header.intrinsic.instr_type = intrin->instr.type;
1052    header.intrinsic.intrinsic = intrin->intrinsic;
1053 
1054    /* Analyze constant indices to decide how to encode them. */
1055    if (num_indices) {
1056       unsigned max_bits = 0;
1057       for (unsigned i = 0; i < num_indices; i++) {
1058          unsigned max = util_last_bit(intrin->const_index[i]);
1059          max_bits = MAX2(max_bits, max);
1060       }
1061 
1062       if (max_bits * num_indices <= 8) {
1063          header.intrinsic.const_indices_encoding = const_indices_all_combined;
1064 
1065          /* Pack all const indices into 8 bits. */
1066          unsigned bit_size = 8 / num_indices;
1067          for (unsigned i = 0; i < num_indices; i++) {
1068             header.intrinsic.packed_const_indices |=
1069                intrin->const_index[i] << (i * bit_size);
1070          }
1071       } else if (max_bits <= 8)
1072          header.intrinsic.const_indices_encoding = const_indices_8bit;
1073       else if (max_bits <= 16)
1074          header.intrinsic.const_indices_encoding = const_indices_16bit;
1075       else
1076          header.intrinsic.const_indices_encoding = const_indices_32bit;
1077    }
1078 
1079    if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1080       write_def(ctx, &intrin->def, header, intrin->instr.type);
1081    else
1082       blob_write_uint32(ctx->blob, header.u32);
1083 
1084    for (unsigned i = 0; i < num_srcs; i++)
1085       write_src(ctx, &intrin->src[i]);
1086 
1087    if (num_indices) {
1088       switch (header.intrinsic.const_indices_encoding) {
1089       case const_indices_8bit:
1090          for (unsigned i = 0; i < num_indices; i++)
1091             blob_write_uint8(ctx->blob, intrin->const_index[i]);
1092          break;
1093       case const_indices_16bit:
1094          for (unsigned i = 0; i < num_indices; i++)
1095             blob_write_uint16(ctx->blob, intrin->const_index[i]);
1096          break;
1097       case const_indices_32bit:
1098          for (unsigned i = 0; i < num_indices; i++)
1099             blob_write_uint32(ctx->blob, intrin->const_index[i]);
1100          break;
1101       }
1102    }
1103 }
1104 
1105 static nir_intrinsic_instr *
read_intrinsic(read_ctx * ctx,union packed_instr header)1106 read_intrinsic(read_ctx *ctx, union packed_instr header)
1107 {
1108    nir_intrinsic_op op = header.intrinsic.intrinsic;
1109    nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1110 
1111    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1112    unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1113 
1114    if (nir_intrinsic_infos[op].has_dest)
1115       read_def(ctx, &intrin->def, &intrin->instr, header);
1116 
1117    for (unsigned i = 0; i < num_srcs; i++)
1118       read_src(ctx, &intrin->src[i]);
1119 
1120    /* Vectorized instrinsics have num_components same as dst or src that has
1121     * 0 components in the info. Find it.
1122     */
1123    if (nir_intrinsic_infos[op].has_dest &&
1124        nir_intrinsic_infos[op].dest_components == 0) {
1125       intrin->num_components = intrin->def.num_components;
1126    } else {
1127       for (unsigned i = 0; i < num_srcs; i++) {
1128          if (nir_intrinsic_infos[op].src_components[i] == 0) {
1129             intrin->num_components = nir_src_num_components(intrin->src[i]);
1130             break;
1131          }
1132       }
1133    }
1134 
1135    if (num_indices) {
1136       switch (header.intrinsic.const_indices_encoding) {
1137       case const_indices_all_combined: {
1138          unsigned bit_size = 8 / num_indices;
1139          unsigned bit_mask = u_bit_consecutive(0, bit_size);
1140          for (unsigned i = 0; i < num_indices; i++) {
1141             intrin->const_index[i] =
1142                (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1143                bit_mask;
1144          }
1145          break;
1146       }
1147       case const_indices_8bit:
1148          for (unsigned i = 0; i < num_indices; i++)
1149             intrin->const_index[i] = blob_read_uint8(ctx->blob);
1150          break;
1151       case const_indices_16bit:
1152          for (unsigned i = 0; i < num_indices; i++)
1153             intrin->const_index[i] = blob_read_uint16(ctx->blob);
1154          break;
1155       case const_indices_32bit:
1156          for (unsigned i = 0; i < num_indices; i++)
1157             intrin->const_index[i] = blob_read_uint32(ctx->blob);
1158          break;
1159       }
1160    }
1161 
1162    return intrin;
1163 }
1164 
1165 static void
write_load_const(write_ctx * ctx,const nir_load_const_instr * lc)1166 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1167 {
1168    assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1169    union packed_instr header;
1170    header.u32 = 0;
1171 
1172    header.load_const.instr_type = lc->instr.type;
1173    header.load_const.last_component = lc->def.num_components - 1;
1174    header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1175    header.load_const.packing = load_const_full;
1176 
1177    /* Try to pack 1-component constants into the 19 free bits in the header. */
1178    if (lc->def.num_components == 1) {
1179       switch (lc->def.bit_size) {
1180       case 64:
1181          if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1182             /* packed_value contains high 19 bits, low bits are 0 */
1183             header.load_const.packing = load_const_scalar_hi_19bits;
1184             header.load_const.packed_value = lc->value[0].u64 >> 45;
1185          } else if (util_mask_sign_extend(lc->value[0].i64, 19) == lc->value[0].i64) {
1186             /* packed_value contains low 19 bits, high bits are sign-extended */
1187             header.load_const.packing = load_const_scalar_lo_19bits_sext;
1188             header.load_const.packed_value = lc->value[0].u64;
1189          }
1190          break;
1191 
1192       case 32:
1193          if ((lc->value[0].u32 & 0x1fff) == 0) {
1194             header.load_const.packing = load_const_scalar_hi_19bits;
1195             header.load_const.packed_value = lc->value[0].u32 >> 13;
1196          } else if (util_mask_sign_extend(lc->value[0].i32, 19) == lc->value[0].i32) {
1197             header.load_const.packing = load_const_scalar_lo_19bits_sext;
1198             header.load_const.packed_value = lc->value[0].u32;
1199          }
1200          break;
1201 
1202       case 16:
1203          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1204          header.load_const.packed_value = lc->value[0].u16;
1205          break;
1206       case 8:
1207          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1208          header.load_const.packed_value = lc->value[0].u8;
1209          break;
1210       case 1:
1211          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1212          header.load_const.packed_value = lc->value[0].b;
1213          break;
1214       default:
1215          unreachable("invalid bit_size");
1216       }
1217    }
1218 
1219    blob_write_uint32(ctx->blob, header.u32);
1220 
1221    if (header.load_const.packing == load_const_full) {
1222       switch (lc->def.bit_size) {
1223       case 64:
1224          blob_write_bytes(ctx->blob, lc->value,
1225                           sizeof(*lc->value) * lc->def.num_components);
1226          break;
1227 
1228       case 32:
1229          for (unsigned i = 0; i < lc->def.num_components; i++)
1230             blob_write_uint32(ctx->blob, lc->value[i].u32);
1231          break;
1232 
1233       case 16:
1234          for (unsigned i = 0; i < lc->def.num_components; i++)
1235             blob_write_uint16(ctx->blob, lc->value[i].u16);
1236          break;
1237 
1238       default:
1239          assert(lc->def.bit_size <= 8);
1240          for (unsigned i = 0; i < lc->def.num_components; i++)
1241             blob_write_uint8(ctx->blob, lc->value[i].u8);
1242          break;
1243       }
1244    }
1245 
1246    write_add_object(ctx, &lc->def);
1247 }
1248 
1249 static nir_load_const_instr *
read_load_const(read_ctx * ctx,union packed_instr header)1250 read_load_const(read_ctx *ctx, union packed_instr header)
1251 {
1252    nir_load_const_instr *lc =
1253       nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1254                                   decode_bit_size_3bits(header.load_const.bit_size));
1255    lc->def.divergent = false;
1256 
1257    switch (header.load_const.packing) {
1258    case load_const_scalar_hi_19bits:
1259       switch (lc->def.bit_size) {
1260       case 64:
1261          lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1262          break;
1263       case 32:
1264          lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1265          break;
1266       default:
1267          unreachable("invalid bit_size");
1268       }
1269       break;
1270 
1271    case load_const_scalar_lo_19bits_sext:
1272       switch (lc->def.bit_size) {
1273       case 64:
1274          lc->value[0].u64 = header.load_const.packed_value;
1275          if (lc->value[0].u64 >> 18)
1276             lc->value[0].u64 |= UINT64_C(0xfffffffffff80000);
1277          break;
1278       case 32:
1279          lc->value[0].u32 = header.load_const.packed_value;
1280          if (lc->value[0].u32 >> 18)
1281             lc->value[0].u32 |= 0xfff80000;
1282          break;
1283       case 16:
1284          lc->value[0].u16 = header.load_const.packed_value;
1285          break;
1286       case 8:
1287          lc->value[0].u8 = header.load_const.packed_value;
1288          break;
1289       case 1:
1290          lc->value[0].b = header.load_const.packed_value;
1291          break;
1292       default:
1293          unreachable("invalid bit_size");
1294       }
1295       break;
1296 
1297    case load_const_full:
1298       switch (lc->def.bit_size) {
1299       case 64:
1300          blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1301          break;
1302 
1303       case 32:
1304          for (unsigned i = 0; i < lc->def.num_components; i++)
1305             lc->value[i].u32 = blob_read_uint32(ctx->blob);
1306          break;
1307 
1308       case 16:
1309          for (unsigned i = 0; i < lc->def.num_components; i++)
1310             lc->value[i].u16 = blob_read_uint16(ctx->blob);
1311          break;
1312 
1313       default:
1314          assert(lc->def.bit_size <= 8);
1315          for (unsigned i = 0; i < lc->def.num_components; i++)
1316             lc->value[i].u8 = blob_read_uint8(ctx->blob);
1317          break;
1318       }
1319       break;
1320    }
1321 
1322    read_add_object(ctx, &lc->def);
1323    return lc;
1324 }
1325 
1326 static void
write_ssa_undef(write_ctx * ctx,const nir_undef_instr * undef)1327 write_ssa_undef(write_ctx *ctx, const nir_undef_instr *undef)
1328 {
1329    assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1330 
1331    union packed_instr header;
1332    header.u32 = 0;
1333 
1334    header.undef.instr_type = undef->instr.type;
1335    header.undef.last_component = undef->def.num_components - 1;
1336    header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1337 
1338    blob_write_uint32(ctx->blob, header.u32);
1339    write_add_object(ctx, &undef->def);
1340 }
1341 
1342 static nir_undef_instr *
read_ssa_undef(read_ctx * ctx,union packed_instr header)1343 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1344 {
1345    nir_undef_instr *undef =
1346       nir_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1347                              decode_bit_size_3bits(header.undef.bit_size));
1348 
1349    undef->def.divergent = false;
1350 
1351    read_add_object(ctx, &undef->def);
1352    return undef;
1353 }
1354 
1355 union packed_tex_data {
1356    uint32_t u32;
1357    struct {
1358       unsigned sampler_dim : 4;
1359       unsigned dest_type : 8;
1360       unsigned coord_components : 3;
1361       unsigned is_array : 1;
1362       unsigned is_shadow : 1;
1363       unsigned is_new_style_shadow : 1;
1364       unsigned is_sparse : 1;
1365       unsigned component : 2;
1366       unsigned texture_non_uniform : 1;
1367       unsigned sampler_non_uniform : 1;
1368       unsigned array_is_lowered_cube : 1;
1369       unsigned is_gather_implicit_lod : 1;
1370       unsigned unused : 5; /* Mark unused for valgrind. */
1371    } u;
1372 };
1373 
1374 static void
write_tex(write_ctx * ctx,const nir_tex_instr * tex)1375 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1376 {
1377    assert(tex->num_srcs < 16);
1378    assert(tex->op < 32);
1379 
1380    union packed_instr header;
1381    header.u32 = 0;
1382 
1383    header.tex.instr_type = tex->instr.type;
1384    header.tex.num_srcs = tex->num_srcs;
1385    header.tex.op = tex->op;
1386 
1387    write_def(ctx, &tex->def, header, tex->instr.type);
1388 
1389    blob_write_uint32(ctx->blob, tex->texture_index);
1390    blob_write_uint32(ctx->blob, tex->sampler_index);
1391    blob_write_uint32(ctx->blob, tex->backend_flags);
1392    if (tex->op == nir_texop_tg4)
1393       blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1394 
1395    STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1396    union packed_tex_data packed = {
1397       .u.sampler_dim = tex->sampler_dim,
1398       .u.dest_type = tex->dest_type,
1399       .u.coord_components = tex->coord_components,
1400       .u.is_array = tex->is_array,
1401       .u.is_shadow = tex->is_shadow,
1402       .u.is_new_style_shadow = tex->is_new_style_shadow,
1403       .u.is_sparse = tex->is_sparse,
1404       .u.component = tex->component,
1405       .u.texture_non_uniform = tex->texture_non_uniform,
1406       .u.sampler_non_uniform = tex->sampler_non_uniform,
1407       .u.array_is_lowered_cube = tex->array_is_lowered_cube,
1408       .u.is_gather_implicit_lod = tex->is_gather_implicit_lod,
1409    };
1410    blob_write_uint32(ctx->blob, packed.u32);
1411 
1412    for (unsigned i = 0; i < tex->num_srcs; i++) {
1413       union packed_src src;
1414       src.u32 = 0;
1415       src.tex.src_type = tex->src[i].src_type;
1416       write_src_full(ctx, &tex->src[i].src, src);
1417    }
1418 }
1419 
1420 static nir_tex_instr *
read_tex(read_ctx * ctx,union packed_instr header)1421 read_tex(read_ctx *ctx, union packed_instr header)
1422 {
1423    nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1424 
1425    read_def(ctx, &tex->def, &tex->instr, header);
1426 
1427    tex->op = header.tex.op;
1428    tex->texture_index = blob_read_uint32(ctx->blob);
1429    tex->sampler_index = blob_read_uint32(ctx->blob);
1430    tex->backend_flags = blob_read_uint32(ctx->blob);
1431    if (tex->op == nir_texop_tg4)
1432       blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1433 
1434    union packed_tex_data packed;
1435    packed.u32 = blob_read_uint32(ctx->blob);
1436    tex->sampler_dim = packed.u.sampler_dim;
1437    tex->dest_type = packed.u.dest_type;
1438    tex->coord_components = packed.u.coord_components;
1439    tex->is_array = packed.u.is_array;
1440    tex->is_shadow = packed.u.is_shadow;
1441    tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1442    tex->is_sparse = packed.u.is_sparse;
1443    tex->component = packed.u.component;
1444    tex->texture_non_uniform = packed.u.texture_non_uniform;
1445    tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1446    tex->array_is_lowered_cube = packed.u.array_is_lowered_cube;
1447    tex->is_gather_implicit_lod = packed.u.is_gather_implicit_lod;
1448 
1449    for (unsigned i = 0; i < tex->num_srcs; i++) {
1450       union packed_src src = read_src(ctx, &tex->src[i].src);
1451       tex->src[i].src_type = src.tex.src_type;
1452    }
1453 
1454    return tex;
1455 }
1456 
1457 static void
write_phi(write_ctx * ctx,const nir_phi_instr * phi)1458 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1459 {
1460    union packed_instr header;
1461    header.u32 = 0;
1462 
1463    header.phi.instr_type = phi->instr.type;
1464    header.phi.num_srcs = exec_list_length(&phi->srcs);
1465 
1466    /* Phi nodes are special, since they may reference SSA definitions and
1467     * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1468     * and then store enough information so that a later fixup pass can fill
1469     * them in correctly.
1470     */
1471    write_def(ctx, &phi->def, header, phi->instr.type);
1472 
1473    nir_foreach_phi_src(src, phi) {
1474       size_t blob_offset = blob_reserve_uint32(ctx->blob);
1475       ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1476       assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1477       write_phi_fixup fixup = {
1478          .blob_offset = blob_offset,
1479          .src = src->src.ssa,
1480          .block = src->pred,
1481       };
1482       util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1483    }
1484 }
1485 
1486 static void
write_fixup_phis(write_ctx * ctx)1487 write_fixup_phis(write_ctx *ctx)
1488 {
1489    util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1490       blob_overwrite_uint32(ctx->blob, fixup->blob_offset,
1491                             write_lookup_object(ctx, fixup->src));
1492       blob_overwrite_uint32(ctx->blob, fixup->blob_offset + sizeof(uint32_t),
1493                             write_lookup_object(ctx, fixup->block));
1494    }
1495 
1496    util_dynarray_clear(&ctx->phi_fixups);
1497 }
1498 
1499 static nir_phi_instr *
read_phi(read_ctx * ctx,nir_block * blk,union packed_instr header)1500 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1501 {
1502    nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1503 
1504    read_def(ctx, &phi->def, &phi->instr, header);
1505 
1506    /* For similar reasons as before, we just store the index directly into the
1507     * pointer, and let a later pass resolve the phi sources.
1508     *
1509     * In order to ensure that the copied sources (which are just the indices
1510     * from the blob for now) don't get inserted into the old shader's use-def
1511     * lists, we have to add the phi instruction *before* we set up its
1512     * sources.
1513     */
1514    nir_instr_insert_after_block(blk, &phi->instr);
1515 
1516    for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1517       nir_def *def = (nir_def *)(uintptr_t)blob_read_uint32(ctx->blob);
1518       nir_block *pred = (nir_block *)(uintptr_t)blob_read_uint32(ctx->blob);
1519       nir_phi_src *src = nir_phi_instr_add_src(phi, pred, def);
1520 
1521       /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1522        * we have to set the parent_instr manually.  It doesn't really matter
1523        * when we do it, so we might as well do it here.
1524        */
1525       nir_src_set_parent_instr(&src->src, &phi->instr);
1526 
1527       /* Stash it in the list of phi sources.  We'll walk this list and fix up
1528        * sources at the very end of read_function_impl.
1529        */
1530       list_add(&src->src.use_link, &ctx->phi_srcs);
1531    }
1532 
1533    return phi;
1534 }
1535 
1536 static void
read_fixup_phis(read_ctx * ctx)1537 read_fixup_phis(read_ctx *ctx)
1538 {
1539    list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1540       src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1541       src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1542 
1543       /* Remove from this list */
1544       list_del(&src->src.use_link);
1545 
1546       list_addtail(&src->src.use_link, &src->src.ssa->uses);
1547    }
1548    assert(list_is_empty(&ctx->phi_srcs));
1549 }
1550 
1551 static void
write_jump(write_ctx * ctx,const nir_jump_instr * jmp)1552 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1553 {
1554    /* These aren't handled because they require special block linking */
1555    assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
1556 
1557    assert(jmp->type < 4);
1558 
1559    union packed_instr header;
1560    header.u32 = 0;
1561 
1562    header.jump.instr_type = jmp->instr.type;
1563    header.jump.type = jmp->type;
1564 
1565    blob_write_uint32(ctx->blob, header.u32);
1566 }
1567 
1568 static nir_jump_instr *
read_jump(read_ctx * ctx,union packed_instr header)1569 read_jump(read_ctx *ctx, union packed_instr header)
1570 {
1571    /* These aren't handled because they require special block linking */
1572    assert(header.jump.type != nir_jump_goto &&
1573           header.jump.type != nir_jump_goto_if);
1574 
1575    nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1576    return jmp;
1577 }
1578 
1579 static void
write_call(write_ctx * ctx,const nir_call_instr * call)1580 write_call(write_ctx *ctx, const nir_call_instr *call)
1581 {
1582    blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1583 
1584    for (unsigned i = 0; i < call->num_params; i++)
1585       write_src(ctx, &call->params[i]);
1586 }
1587 
1588 static nir_call_instr *
read_call(read_ctx * ctx)1589 read_call(read_ctx *ctx)
1590 {
1591    nir_function *callee = read_object(ctx);
1592    nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1593 
1594    for (unsigned i = 0; i < call->num_params; i++)
1595       read_src(ctx, &call->params[i]);
1596 
1597    return call;
1598 }
1599 
1600 static void
write_instr(write_ctx * ctx,const nir_instr * instr)1601 write_instr(write_ctx *ctx, const nir_instr *instr)
1602 {
1603    /* We have only 4 bits for the instruction type. */
1604    assert(instr->type < 16);
1605 
1606    switch (instr->type) {
1607    case nir_instr_type_alu:
1608       write_alu(ctx, nir_instr_as_alu(instr));
1609       break;
1610    case nir_instr_type_deref:
1611       write_deref(ctx, nir_instr_as_deref(instr));
1612       break;
1613    case nir_instr_type_intrinsic:
1614       write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1615       break;
1616    case nir_instr_type_load_const:
1617       write_load_const(ctx, nir_instr_as_load_const(instr));
1618       break;
1619    case nir_instr_type_undef:
1620       write_ssa_undef(ctx, nir_instr_as_undef(instr));
1621       break;
1622    case nir_instr_type_tex:
1623       write_tex(ctx, nir_instr_as_tex(instr));
1624       break;
1625    case nir_instr_type_phi:
1626       write_phi(ctx, nir_instr_as_phi(instr));
1627       break;
1628    case nir_instr_type_jump:
1629       write_jump(ctx, nir_instr_as_jump(instr));
1630       break;
1631    case nir_instr_type_call:
1632       blob_write_uint32(ctx->blob, instr->type);
1633       write_call(ctx, nir_instr_as_call(instr));
1634       break;
1635    case nir_instr_type_parallel_copy:
1636       unreachable("Cannot write parallel copies");
1637    default:
1638       unreachable("bad instr type");
1639    }
1640 }
1641 
1642 /* Return the number of instructions read. */
1643 static unsigned
read_instr(read_ctx * ctx,nir_block * block)1644 read_instr(read_ctx *ctx, nir_block *block)
1645 {
1646    STATIC_ASSERT(sizeof(union packed_instr) == 4);
1647    union packed_instr header;
1648    header.u32 = blob_read_uint32(ctx->blob);
1649    nir_instr *instr;
1650 
1651    switch (header.any.instr_type) {
1652    case nir_instr_type_alu:
1653       for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1654          nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1655       return header.alu.num_followup_alu_sharing_header + 1;
1656    case nir_instr_type_deref:
1657       instr = &read_deref(ctx, header)->instr;
1658       break;
1659    case nir_instr_type_intrinsic:
1660       instr = &read_intrinsic(ctx, header)->instr;
1661       break;
1662    case nir_instr_type_load_const:
1663       instr = &read_load_const(ctx, header)->instr;
1664       break;
1665    case nir_instr_type_undef:
1666       instr = &read_ssa_undef(ctx, header)->instr;
1667       break;
1668    case nir_instr_type_tex:
1669       instr = &read_tex(ctx, header)->instr;
1670       break;
1671    case nir_instr_type_phi:
1672       /* Phi instructions are a bit of a special case when reading because we
1673        * don't want inserting the instruction to automatically handle use/defs
1674        * for us.  Instead, we need to wait until all the blocks/instructions
1675        * are read so that we can set their sources up.
1676        */
1677       read_phi(ctx, block, header);
1678       return 1;
1679    case nir_instr_type_jump:
1680       instr = &read_jump(ctx, header)->instr;
1681       break;
1682    case nir_instr_type_call:
1683       instr = &read_call(ctx)->instr;
1684       break;
1685    case nir_instr_type_parallel_copy:
1686       unreachable("Cannot read parallel copies");
1687    default:
1688       unreachable("bad instr type");
1689    }
1690 
1691    nir_instr_insert_after_block(block, instr);
1692    return 1;
1693 }
1694 
1695 static void
write_block(write_ctx * ctx,const nir_block * block)1696 write_block(write_ctx *ctx, const nir_block *block)
1697 {
1698    write_add_object(ctx, block);
1699    blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1700 
1701    ctx->last_instr_type = ~0;
1702    ctx->last_alu_header_offset = 0;
1703 
1704    nir_foreach_instr(instr, block) {
1705       write_instr(ctx, instr);
1706       ctx->last_instr_type = instr->type;
1707    }
1708 }
1709 
1710 static void
read_block(read_ctx * ctx,struct exec_list * cf_list)1711 read_block(read_ctx *ctx, struct exec_list *cf_list)
1712 {
1713    /* Don't actually create a new block.  Just use the one from the tail of
1714     * the list.  NIR guarantees that the tail of the list is a block and that
1715     * no two blocks are side-by-side in the IR;  It should be empty.
1716     */
1717    nir_block *block =
1718       exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1719 
1720    read_add_object(ctx, block);
1721    unsigned num_instrs = blob_read_uint32(ctx->blob);
1722    for (unsigned i = 0; i < num_instrs;) {
1723       i += read_instr(ctx, block);
1724    }
1725 }
1726 
1727 static void
1728 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1729 
1730 static void
1731 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1732 
1733 static void
write_if(write_ctx * ctx,nir_if * nif)1734 write_if(write_ctx *ctx, nir_if *nif)
1735 {
1736    write_src(ctx, &nif->condition);
1737    blob_write_uint8(ctx->blob, nif->control);
1738 
1739    write_cf_list(ctx, &nif->then_list);
1740    write_cf_list(ctx, &nif->else_list);
1741 }
1742 
1743 static void
read_if(read_ctx * ctx,struct exec_list * cf_list)1744 read_if(read_ctx *ctx, struct exec_list *cf_list)
1745 {
1746    nir_if *nif = nir_if_create(ctx->nir);
1747 
1748    read_src(ctx, &nif->condition);
1749    nif->control = blob_read_uint8(ctx->blob);
1750 
1751    nir_cf_node_insert_end(cf_list, &nif->cf_node);
1752 
1753    read_cf_list(ctx, &nif->then_list);
1754    read_cf_list(ctx, &nif->else_list);
1755 }
1756 
1757 static void
write_loop(write_ctx * ctx,nir_loop * loop)1758 write_loop(write_ctx *ctx, nir_loop *loop)
1759 {
1760    blob_write_uint8(ctx->blob, loop->control);
1761    blob_write_uint8(ctx->blob, loop->divergent);
1762    bool has_continue_construct = nir_loop_has_continue_construct(loop);
1763    blob_write_uint8(ctx->blob, has_continue_construct);
1764 
1765    write_cf_list(ctx, &loop->body);
1766    if (has_continue_construct) {
1767       write_cf_list(ctx, &loop->continue_list);
1768    }
1769 }
1770 
1771 static void
read_loop(read_ctx * ctx,struct exec_list * cf_list)1772 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1773 {
1774    nir_loop *loop = nir_loop_create(ctx->nir);
1775 
1776    nir_cf_node_insert_end(cf_list, &loop->cf_node);
1777 
1778    loop->control = blob_read_uint8(ctx->blob);
1779    loop->divergent = blob_read_uint8(ctx->blob);
1780    bool has_continue_construct = blob_read_uint8(ctx->blob);
1781 
1782    read_cf_list(ctx, &loop->body);
1783    if (has_continue_construct) {
1784       nir_loop_add_continue_construct(loop);
1785       read_cf_list(ctx, &loop->continue_list);
1786    }
1787 }
1788 
1789 static void
write_cf_node(write_ctx * ctx,nir_cf_node * cf)1790 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1791 {
1792    blob_write_uint32(ctx->blob, cf->type);
1793 
1794    switch (cf->type) {
1795    case nir_cf_node_block:
1796       write_block(ctx, nir_cf_node_as_block(cf));
1797       break;
1798    case nir_cf_node_if:
1799       write_if(ctx, nir_cf_node_as_if(cf));
1800       break;
1801    case nir_cf_node_loop:
1802       write_loop(ctx, nir_cf_node_as_loop(cf));
1803       break;
1804    default:
1805       unreachable("bad cf type");
1806    }
1807 }
1808 
1809 static void
read_cf_node(read_ctx * ctx,struct exec_list * list)1810 read_cf_node(read_ctx *ctx, struct exec_list *list)
1811 {
1812    nir_cf_node_type type = blob_read_uint32(ctx->blob);
1813 
1814    switch (type) {
1815    case nir_cf_node_block:
1816       read_block(ctx, list);
1817       break;
1818    case nir_cf_node_if:
1819       read_if(ctx, list);
1820       break;
1821    case nir_cf_node_loop:
1822       read_loop(ctx, list);
1823       break;
1824    default:
1825       unreachable("bad cf type");
1826    }
1827 }
1828 
1829 static void
write_cf_list(write_ctx * ctx,const struct exec_list * cf_list)1830 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1831 {
1832    blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1833    foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1834       write_cf_node(ctx, cf);
1835    }
1836 }
1837 
1838 static void
read_cf_list(read_ctx * ctx,struct exec_list * cf_list)1839 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1840 {
1841    uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1842    for (unsigned i = 0; i < num_cf_nodes; i++)
1843       read_cf_node(ctx, cf_list);
1844 }
1845 
1846 static void
write_function_impl(write_ctx * ctx,const nir_function_impl * fi)1847 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1848 {
1849    blob_write_uint8(ctx->blob, fi->structured);
1850    blob_write_uint8(ctx->blob, !!fi->preamble);
1851 
1852    if (fi->preamble)
1853       blob_write_uint32(ctx->blob, write_lookup_object(ctx, fi->preamble));
1854 
1855    write_var_list(ctx, &fi->locals);
1856 
1857    write_cf_list(ctx, &fi->body);
1858    write_fixup_phis(ctx);
1859 }
1860 
1861 static nir_function_impl *
read_function_impl(read_ctx * ctx)1862 read_function_impl(read_ctx *ctx)
1863 {
1864    nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1865 
1866    fi->structured = blob_read_uint8(ctx->blob);
1867    bool preamble = blob_read_uint8(ctx->blob);
1868 
1869    if (preamble)
1870       fi->preamble = read_object(ctx);
1871 
1872    read_var_list(ctx, &fi->locals);
1873 
1874    read_cf_list(ctx, &fi->body);
1875    read_fixup_phis(ctx);
1876 
1877    fi->valid_metadata = 0;
1878 
1879    return fi;
1880 }
1881 
1882 static void
write_function(write_ctx * ctx,const nir_function * fxn)1883 write_function(write_ctx *ctx, const nir_function *fxn)
1884 {
1885    uint32_t flags = 0;
1886    if (fxn->is_entrypoint)
1887       flags |= 0x1;
1888    if (fxn->is_preamble)
1889       flags |= 0x2;
1890    if (fxn->name)
1891       flags |= 0x4;
1892    if (fxn->impl)
1893       flags |= 0x8;
1894    if (fxn->should_inline)
1895       flags |= 0x10;
1896    if (fxn->dont_inline)
1897       flags |= 0x20;
1898    blob_write_uint32(ctx->blob, flags);
1899    if (fxn->name)
1900       blob_write_string(ctx->blob, fxn->name);
1901 
1902    write_add_object(ctx, fxn);
1903 
1904    blob_write_uint32(ctx->blob, fxn->num_params);
1905    for (unsigned i = 0; i < fxn->num_params; i++) {
1906       uint32_t val =
1907          ((uint32_t)fxn->params[i].num_components) |
1908          ((uint32_t)fxn->params[i].bit_size) << 8;
1909       blob_write_uint32(ctx->blob, val);
1910    }
1911 
1912    /* At first glance, it looks like we should write the function_impl here.
1913     * However, call instructions need to be able to reference at least the
1914     * function and those will get processed as we write the function_impls.
1915     * We stop here and write function_impls as a second pass.
1916     */
1917 }
1918 
1919 static void
read_function(read_ctx * ctx)1920 read_function(read_ctx *ctx)
1921 {
1922    uint32_t flags = blob_read_uint32(ctx->blob);
1923    bool has_name = flags & 0x4;
1924    char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1925 
1926    nir_function *fxn = nir_function_create(ctx->nir, name);
1927 
1928    read_add_object(ctx, fxn);
1929 
1930    fxn->num_params = blob_read_uint32(ctx->blob);
1931    fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1932    for (unsigned i = 0; i < fxn->num_params; i++) {
1933       uint32_t val = blob_read_uint32(ctx->blob);
1934       fxn->params[i].num_components = val & 0xff;
1935       fxn->params[i].bit_size = (val >> 8) & 0xff;
1936    }
1937 
1938    fxn->is_entrypoint = flags & 0x1;
1939    fxn->is_preamble = flags & 0x2;
1940    if (flags & 0x8)
1941       fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1942    fxn->should_inline = flags & 0x10;
1943    fxn->dont_inline = flags & 0x20;
1944 }
1945 
1946 static void
write_xfb_info(write_ctx * ctx,const nir_xfb_info * xfb)1947 write_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb)
1948 {
1949    if (xfb == NULL) {
1950       blob_write_uint32(ctx->blob, 0);
1951    } else {
1952       size_t size = nir_xfb_info_size(xfb->output_count);
1953       assert(size <= UINT32_MAX);
1954       blob_write_uint32(ctx->blob, size);
1955       blob_write_bytes(ctx->blob, xfb, size);
1956    }
1957 }
1958 
1959 static nir_xfb_info *
read_xfb_info(read_ctx * ctx)1960 read_xfb_info(read_ctx *ctx)
1961 {
1962    uint32_t size = blob_read_uint32(ctx->blob);
1963    if (size == 0)
1964       return NULL;
1965 
1966    struct nir_xfb_info *xfb = ralloc_size(ctx->nir, size);
1967    blob_copy_bytes(ctx->blob, (void *)xfb, size);
1968 
1969    return xfb;
1970 }
1971 
1972 /**
1973  * Serialize NIR into a binary blob.
1974  *
1975  * \param strip  Don't serialize information only useful for debugging,
1976  *               such as variable names, making cache hits from similar
1977  *               shaders more likely.
1978  */
1979 void
nir_serialize(struct blob * blob,const nir_shader * nir,bool strip)1980 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1981 {
1982    write_ctx ctx = { 0 };
1983    ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
1984    ctx.blob = blob;
1985    ctx.nir = nir;
1986    ctx.strip = strip;
1987    util_dynarray_init(&ctx.phi_fixups, NULL);
1988 
1989    size_t idx_size_offset = blob_reserve_uint32(blob);
1990 
1991    struct shader_info info = nir->info;
1992    uint32_t strings = 0;
1993    if (!strip && info.name)
1994       strings |= 0x1;
1995    if (!strip && info.label)
1996       strings |= 0x2;
1997    blob_write_uint32(blob, strings);
1998    if (!strip && info.name)
1999       blob_write_string(blob, info.name);
2000    if (!strip && info.label)
2001       blob_write_string(blob, info.label);
2002    info.name = info.label = NULL;
2003    blob_write_bytes(blob, (uint8_t *)&info, sizeof(info));
2004 
2005    write_var_list(&ctx, &nir->variables);
2006 
2007    blob_write_uint32(blob, nir->num_inputs);
2008    blob_write_uint32(blob, nir->num_uniforms);
2009    blob_write_uint32(blob, nir->num_outputs);
2010    blob_write_uint32(blob, nir->scratch_size);
2011 
2012    blob_write_uint32(blob, exec_list_length(&nir->functions));
2013    nir_foreach_function(fxn, nir) {
2014       write_function(&ctx, fxn);
2015    }
2016 
2017    nir_foreach_function_impl(impl, nir) {
2018       write_function_impl(&ctx, impl);
2019    }
2020 
2021    blob_write_uint32(blob, nir->constant_data_size);
2022    if (nir->constant_data_size > 0)
2023       blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2024 
2025    write_xfb_info(&ctx, nir->xfb_info);
2026 
2027    if (nir->info.uses_printf)
2028       nir_serialize_printf_info(blob, nir->printf_info, nir->printf_info_count);
2029 
2030    blob_overwrite_uint32(blob, idx_size_offset, ctx.next_idx);
2031 
2032    _mesa_hash_table_destroy(ctx.remap_table, NULL);
2033    util_dynarray_fini(&ctx.phi_fixups);
2034 }
2035 
2036 nir_shader *
nir_deserialize(void * mem_ctx,const struct nir_shader_compiler_options * options,struct blob_reader * blob)2037 nir_deserialize(void *mem_ctx,
2038                 const struct nir_shader_compiler_options *options,
2039                 struct blob_reader *blob)
2040 {
2041    read_ctx ctx = { 0 };
2042    ctx.blob = blob;
2043    list_inithead(&ctx.phi_srcs);
2044    ctx.idx_table_len = blob_read_uint32(blob);
2045    ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2046 
2047    uint32_t strings = blob_read_uint32(blob);
2048    char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2049    char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2050 
2051    struct shader_info info;
2052    blob_copy_bytes(blob, (uint8_t *)&info, sizeof(info));
2053 
2054    ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2055 
2056    info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2057    info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2058 
2059    ctx.nir->info = info;
2060 
2061    read_var_list(&ctx, &ctx.nir->variables);
2062 
2063    ctx.nir->num_inputs = blob_read_uint32(blob);
2064    ctx.nir->num_uniforms = blob_read_uint32(blob);
2065    ctx.nir->num_outputs = blob_read_uint32(blob);
2066    ctx.nir->scratch_size = blob_read_uint32(blob);
2067 
2068    unsigned num_functions = blob_read_uint32(blob);
2069    for (unsigned i = 0; i < num_functions; i++)
2070       read_function(&ctx);
2071 
2072    nir_foreach_function(fxn, ctx.nir) {
2073       if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2074          nir_function_set_impl(fxn, read_function_impl(&ctx));
2075    }
2076 
2077    ctx.nir->constant_data_size = blob_read_uint32(blob);
2078    if (ctx.nir->constant_data_size > 0) {
2079       ctx.nir->constant_data =
2080          ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2081       blob_copy_bytes(blob, ctx.nir->constant_data,
2082                       ctx.nir->constant_data_size);
2083    }
2084 
2085    ctx.nir->xfb_info = read_xfb_info(&ctx);
2086 
2087    if (ctx.nir->info.uses_printf) {
2088       ctx.nir->printf_info =
2089          nir_deserialize_printf_info(ctx.nir, blob,
2090                                      &ctx.nir->printf_info_count);
2091    }
2092 
2093    free(ctx.idx_table);
2094 
2095    nir_validate_shader(ctx.nir, "after deserialize");
2096 
2097    return ctx.nir;
2098 }
2099 
2100 void
nir_shader_serialize_deserialize(nir_shader * shader)2101 nir_shader_serialize_deserialize(nir_shader *shader)
2102 {
2103    const struct nir_shader_compiler_options *options = shader->options;
2104 
2105    struct blob writer;
2106    blob_init(&writer);
2107    nir_serialize(&writer, shader, false);
2108 
2109    /* Delete all of dest's ralloc children but leave dest alone */
2110    void *dead_ctx = ralloc_context(NULL);
2111    ralloc_adopt(dead_ctx, shader);
2112    ralloc_free(dead_ctx);
2113 
2114    dead_ctx = ralloc_context(NULL);
2115 
2116    struct blob_reader reader;
2117    blob_reader_init(&reader, writer.data, writer.size);
2118    nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2119 
2120    blob_finish(&writer);
2121 
2122    nir_shader_replace(shader, copy);
2123    ralloc_free(dead_ctx);
2124 }
2125 
2126 void
nir_serialize_printf_info(struct blob * blob,const u_printf_info * printf_info,unsigned printf_info_count)2127 nir_serialize_printf_info(struct blob *blob,
2128                           const u_printf_info *printf_info,
2129                           unsigned printf_info_count)
2130 {
2131    blob_write_uint32(blob, printf_info_count);
2132    for (int i = 0; i < printf_info_count; i++) {
2133       const u_printf_info *info = &printf_info[i];
2134       blob_write_uint32(blob, info->num_args);
2135       blob_write_uint32(blob, info->string_size);
2136       blob_write_bytes(blob, info->arg_sizes,
2137                        info->num_args * sizeof(info->arg_sizes[0]));
2138       /* we can't use blob_write_string, because it contains multiple NULL
2139        * terminated strings */
2140       blob_write_bytes(blob, info->strings, info->string_size);
2141    }
2142 }
2143 
2144 u_printf_info *
nir_deserialize_printf_info(void * mem_ctx,struct blob_reader * blob,unsigned * printf_info_count)2145 nir_deserialize_printf_info(void *mem_ctx,
2146                             struct blob_reader *blob,
2147                             unsigned *printf_info_count)
2148 {
2149    *printf_info_count = blob_read_uint32(blob);
2150 
2151    u_printf_info *printf_info =
2152       ralloc_array(mem_ctx, u_printf_info, *printf_info_count);
2153 
2154    for (int i = 0; i < *printf_info_count; i++) {
2155       u_printf_info *info = &printf_info[i];
2156       info->num_args = blob_read_uint32(blob);
2157       info->string_size = blob_read_uint32(blob);
2158       info->arg_sizes = ralloc_array(mem_ctx, unsigned, info->num_args);
2159       blob_copy_bytes(blob, info->arg_sizes,
2160                       info->num_args * sizeof(info->arg_sizes[0]));
2161       info->strings = ralloc_array(mem_ctx, char, info->string_size);
2162       blob_copy_bytes(blob, info->strings, info->string_size);
2163    }
2164 
2165    return printf_info;
2166 }
2167