1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "util/u_dynarray.h"
26 #include "util/u_math.h"
27 #include "nir_control_flow.h"
28 #include "nir_xfb_info.h"
29
30 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
31 #define MAX_OBJECT_IDS (1 << 20)
32
33 typedef struct {
34 size_t blob_offset;
35 nir_def *src;
36 nir_block *block;
37 } write_phi_fixup;
38
39 typedef struct {
40 const nir_shader *nir;
41
42 struct blob *blob;
43
44 /* maps pointer to index */
45 struct hash_table *remap_table;
46
47 /* the next index to assign to a NIR in-memory object */
48 uint32_t next_idx;
49
50 /* Array of write_phi_fixup structs representing phi sources that need to
51 * be resolved in the second pass.
52 */
53 struct util_dynarray phi_fixups;
54
55 /* The last serialized type. */
56 const struct glsl_type *last_type;
57 const struct glsl_type *last_interface_type;
58 struct nir_variable_data last_var_data;
59
60 /* For skipping equal ALU headers (typical after scalarization). */
61 nir_instr_type last_instr_type;
62 uintptr_t last_alu_header_offset;
63 uint32_t last_alu_header;
64
65 /* Don't write optional data such as variable names. */
66 bool strip;
67 } write_ctx;
68
69 typedef struct {
70 nir_shader *nir;
71
72 struct blob_reader *blob;
73
74 /* the next index to assign to a NIR in-memory object */
75 uint32_t next_idx;
76
77 /* The length of the index -> object table */
78 uint32_t idx_table_len;
79
80 /* map from index to deserialized pointer */
81 void **idx_table;
82
83 /* List of phi sources. */
84 struct list_head phi_srcs;
85
86 /* The last deserialized type. */
87 const struct glsl_type *last_type;
88 const struct glsl_type *last_interface_type;
89 struct nir_variable_data last_var_data;
90 } read_ctx;
91
92 static void
write_add_object(write_ctx * ctx,const void * obj)93 write_add_object(write_ctx *ctx, const void *obj)
94 {
95 uint32_t index = ctx->next_idx++;
96 assert(index != MAX_OBJECT_IDS);
97 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t)index);
98 }
99
100 static uint32_t
write_lookup_object(write_ctx * ctx,const void * obj)101 write_lookup_object(write_ctx *ctx, const void *obj)
102 {
103 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
104 assert(entry);
105 return (uint32_t)(uintptr_t)entry->data;
106 }
107
108 static void
read_add_object(read_ctx * ctx,void * obj)109 read_add_object(read_ctx *ctx, void *obj)
110 {
111 assert(ctx->next_idx < ctx->idx_table_len);
112 ctx->idx_table[ctx->next_idx++] = obj;
113 }
114
115 static void *
read_lookup_object(read_ctx * ctx,uint32_t idx)116 read_lookup_object(read_ctx *ctx, uint32_t idx)
117 {
118 assert(idx < ctx->idx_table_len);
119 return ctx->idx_table[idx];
120 }
121
122 static void *
read_object(read_ctx * ctx)123 read_object(read_ctx *ctx)
124 {
125 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
126 }
127
128 static uint32_t
encode_bit_size_3bits(uint8_t bit_size)129 encode_bit_size_3bits(uint8_t bit_size)
130 {
131 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
132 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
133 if (bit_size)
134 return util_logbase2(bit_size) + 1;
135 return 0;
136 }
137
138 static uint8_t
decode_bit_size_3bits(uint8_t bit_size)139 decode_bit_size_3bits(uint8_t bit_size)
140 {
141 if (bit_size)
142 return 1 << (bit_size - 1);
143 return 0;
144 }
145
146 #define NUM_COMPONENTS_IS_SEPARATE_7 7
147
148 static uint8_t
encode_num_components_in_3bits(uint8_t num_components)149 encode_num_components_in_3bits(uint8_t num_components)
150 {
151 if (num_components <= 4)
152 return num_components;
153 if (num_components == 8)
154 return 5;
155 if (num_components == 16)
156 return 6;
157
158 /* special value indicating that num_components is in the next uint32 */
159 return NUM_COMPONENTS_IS_SEPARATE_7;
160 }
161
162 static uint8_t
decode_num_components_in_3bits(uint8_t value)163 decode_num_components_in_3bits(uint8_t value)
164 {
165 if (value <= 4)
166 return value;
167 if (value == 5)
168 return 8;
169 if (value == 6)
170 return 16;
171
172 unreachable("invalid num_components encoding");
173 return 0;
174 }
175
176 static void
write_constant(write_ctx * ctx,const nir_constant * c)177 write_constant(write_ctx *ctx, const nir_constant *c)
178 {
179 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
180 blob_write_uint32(ctx->blob, c->num_elements);
181 for (unsigned i = 0; i < c->num_elements; i++)
182 write_constant(ctx, c->elements[i]);
183 }
184
185 static nir_constant *
read_constant(read_ctx * ctx,nir_variable * nvar)186 read_constant(read_ctx *ctx, nir_variable *nvar)
187 {
188 nir_constant *c = ralloc(nvar, nir_constant);
189
190 static const nir_const_value zero_vals[ARRAY_SIZE(c->values)] = { 0 };
191 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
192 c->is_null_constant = memcmp(c->values, zero_vals, sizeof(c->values)) == 0;
193 c->num_elements = blob_read_uint32(ctx->blob);
194 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
195 for (unsigned i = 0; i < c->num_elements; i++) {
196 c->elements[i] = read_constant(ctx, nvar);
197 c->is_null_constant &= c->elements[i]->is_null_constant;
198 }
199
200 return c;
201 }
202
203 enum var_data_encoding {
204 var_encode_full,
205 var_encode_location_diff,
206 };
207
208 union packed_var {
209 uint32_t u32;
210 struct {
211 unsigned has_name : 1;
212 unsigned has_constant_initializer : 1;
213 unsigned has_pointer_initializer : 1;
214 unsigned has_interface_type : 1;
215 unsigned num_state_slots : 7;
216 unsigned data_encoding : 2;
217 unsigned type_same_as_last : 1;
218 unsigned interface_type_same_as_last : 1;
219 unsigned ray_query : 1;
220 unsigned num_members : 16;
221 } u;
222 };
223
224 union packed_var_data_diff {
225 uint32_t u32;
226 struct {
227 int location : 13;
228 int location_frac : 3;
229 int driver_location : 16;
230 } u;
231 };
232
233 static void
write_variable(write_ctx * ctx,const nir_variable * var)234 write_variable(write_ctx *ctx, const nir_variable *var)
235 {
236 write_add_object(ctx, var);
237
238 assert(var->num_state_slots < (1 << 7));
239
240 STATIC_ASSERT(sizeof(union packed_var) == 4);
241 union packed_var flags;
242 flags.u32 = 0;
243
244 flags.u.has_name = !ctx->strip && var->name;
245 flags.u.has_constant_initializer = !!(var->constant_initializer);
246 flags.u.has_pointer_initializer = !!(var->pointer_initializer);
247 flags.u.has_interface_type = !!(var->interface_type);
248 flags.u.type_same_as_last = var->type == ctx->last_type;
249 flags.u.interface_type_same_as_last =
250 var->interface_type && var->interface_type == ctx->last_interface_type;
251 flags.u.num_state_slots = var->num_state_slots;
252 flags.u.num_members = var->num_members;
253
254 struct nir_variable_data data = var->data;
255
256 /* When stripping, we expect that the location is no longer needed,
257 * which is typically after shaders are linked.
258 */
259 if (ctx->strip &&
260 data.mode != nir_var_system_value &&
261 data.mode != nir_var_shader_in &&
262 data.mode != nir_var_shader_out)
263 data.location = 0;
264
265 struct nir_variable_data tmp = data;
266
267 tmp.location = ctx->last_var_data.location;
268 tmp.location_frac = ctx->last_var_data.location_frac;
269 tmp.driver_location = ctx->last_var_data.driver_location;
270
271 /* See if we can encode only the difference in locations from the last
272 * variable.
273 */
274 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
275 abs((int)data.location -
276 (int)ctx->last_var_data.location) < (1 << 12) &&
277 abs((int)data.driver_location -
278 (int)ctx->last_var_data.driver_location) < (1 << 15))
279 flags.u.data_encoding = var_encode_location_diff;
280 else
281 flags.u.data_encoding = var_encode_full;
282
283 flags.u.ray_query = var->data.ray_query;
284
285 blob_write_uint32(ctx->blob, flags.u32);
286
287 if (!flags.u.type_same_as_last) {
288 encode_type_to_blob(ctx->blob, var->type);
289 ctx->last_type = var->type;
290 }
291
292 if (var->interface_type && !flags.u.interface_type_same_as_last) {
293 encode_type_to_blob(ctx->blob, var->interface_type);
294 ctx->last_interface_type = var->interface_type;
295 }
296
297 if (flags.u.has_name)
298 blob_write_string(ctx->blob, var->name);
299
300 if (flags.u.data_encoding == var_encode_full) {
301 blob_write_bytes(ctx->blob, &data, sizeof(data));
302 } else {
303 /* Serialize only the difference in locations from the last variable.
304 */
305 union packed_var_data_diff diff;
306
307 diff.u.location = data.location - ctx->last_var_data.location;
308 diff.u.location_frac = data.location_frac -
309 ctx->last_var_data.location_frac;
310 diff.u.driver_location = data.driver_location -
311 ctx->last_var_data.driver_location;
312
313 blob_write_uint32(ctx->blob, diff.u32);
314 }
315
316 ctx->last_var_data = data;
317
318 for (unsigned i = 0; i < var->num_state_slots; i++) {
319 blob_write_bytes(ctx->blob, &var->state_slots[i],
320 sizeof(var->state_slots[i]));
321 }
322 if (var->constant_initializer)
323 write_constant(ctx, var->constant_initializer);
324 if (var->pointer_initializer)
325 blob_write_uint32(ctx->blob,
326 write_lookup_object(ctx, var->pointer_initializer));
327 if (var->num_members > 0) {
328 blob_write_bytes(ctx->blob, (uint8_t *)var->members,
329 var->num_members * sizeof(*var->members));
330 }
331 }
332
333 static nir_variable *
read_variable(read_ctx * ctx)334 read_variable(read_ctx *ctx)
335 {
336 nir_variable *var = rzalloc(ctx->nir, nir_variable);
337 read_add_object(ctx, var);
338
339 union packed_var flags;
340 flags.u32 = blob_read_uint32(ctx->blob);
341
342 if (flags.u.type_same_as_last) {
343 var->type = ctx->last_type;
344 } else {
345 var->type = decode_type_from_blob(ctx->blob);
346 ctx->last_type = var->type;
347 }
348
349 if (flags.u.has_interface_type) {
350 if (flags.u.interface_type_same_as_last) {
351 var->interface_type = ctx->last_interface_type;
352 } else {
353 var->interface_type = decode_type_from_blob(ctx->blob);
354 ctx->last_interface_type = var->interface_type;
355 }
356 }
357
358 if (flags.u.has_name) {
359 const char *name = blob_read_string(ctx->blob);
360 var->name = ralloc_strdup(var, name);
361 } else {
362 var->name = NULL;
363 }
364
365 if (flags.u.data_encoding == var_encode_full) {
366 blob_copy_bytes(ctx->blob, (uint8_t *)&var->data, sizeof(var->data));
367 ctx->last_var_data = var->data;
368 } else { /* var_encode_location_diff */
369 union packed_var_data_diff diff;
370 diff.u32 = blob_read_uint32(ctx->blob);
371
372 var->data = ctx->last_var_data;
373 var->data.location += diff.u.location;
374 var->data.location_frac += diff.u.location_frac;
375 var->data.driver_location += diff.u.driver_location;
376
377 ctx->last_var_data = var->data;
378 }
379
380 var->data.ray_query = flags.u.ray_query;
381
382 var->num_state_slots = flags.u.num_state_slots;
383 if (var->num_state_slots != 0) {
384 var->state_slots = ralloc_array(var, nir_state_slot,
385 var->num_state_slots);
386 for (unsigned i = 0; i < var->num_state_slots; i++) {
387 blob_copy_bytes(ctx->blob, &var->state_slots[i],
388 sizeof(var->state_slots[i]));
389 }
390 }
391 if (flags.u.has_constant_initializer)
392 var->constant_initializer = read_constant(ctx, var);
393 else
394 var->constant_initializer = NULL;
395
396 if (flags.u.has_pointer_initializer)
397 var->pointer_initializer = read_object(ctx);
398 else
399 var->pointer_initializer = NULL;
400
401 var->num_members = flags.u.num_members;
402 if (var->num_members > 0) {
403 var->members = ralloc_array(var, struct nir_variable_data,
404 var->num_members);
405 blob_copy_bytes(ctx->blob, (uint8_t *)var->members,
406 var->num_members * sizeof(*var->members));
407 }
408
409 return var;
410 }
411
412 static void
write_var_list(write_ctx * ctx,const struct exec_list * src)413 write_var_list(write_ctx *ctx, const struct exec_list *src)
414 {
415 blob_write_uint32(ctx->blob, exec_list_length(src));
416 foreach_list_typed(nir_variable, var, node, src) {
417 write_variable(ctx, var);
418 }
419 }
420
421 static void
read_var_list(read_ctx * ctx,struct exec_list * dst)422 read_var_list(read_ctx *ctx, struct exec_list *dst)
423 {
424 exec_list_make_empty(dst);
425 unsigned num_vars = blob_read_uint32(ctx->blob);
426 for (unsigned i = 0; i < num_vars; i++) {
427 nir_variable *var = read_variable(ctx);
428 exec_list_push_tail(dst, &var->node);
429 }
430 }
431
432 union packed_src {
433 uint32_t u32;
434 struct {
435 unsigned _pad : 2; /* <-- Header */
436 unsigned object_idx : 20;
437 unsigned _footer : 10; /* <-- Footer */
438 } any;
439 struct {
440 unsigned _header : 22; /* <-- Header */
441 unsigned _pad : 2; /* <-- Footer */
442 unsigned swizzle_x : 2;
443 unsigned swizzle_y : 2;
444 unsigned swizzle_z : 2;
445 unsigned swizzle_w : 2;
446 } alu;
447 struct {
448 unsigned _header : 22; /* <-- Header */
449 unsigned src_type : 5; /* <-- Footer */
450 unsigned _pad : 5;
451 } tex;
452 };
453
454 static void
write_src_full(write_ctx * ctx,const nir_src * src,union packed_src header)455 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
456 {
457 header.any.object_idx = write_lookup_object(ctx, src->ssa);
458 blob_write_uint32(ctx->blob, header.u32);
459 }
460
461 static void
write_src(write_ctx * ctx,const nir_src * src)462 write_src(write_ctx *ctx, const nir_src *src)
463 {
464 union packed_src header = { 0 };
465 write_src_full(ctx, src, header);
466 }
467
468 static union packed_src
read_src(read_ctx * ctx,nir_src * src)469 read_src(read_ctx *ctx, nir_src *src)
470 {
471 STATIC_ASSERT(sizeof(union packed_src) == 4);
472 union packed_src header;
473 header.u32 = blob_read_uint32(ctx->blob);
474
475 src->ssa = read_lookup_object(ctx, header.any.object_idx);
476 return header;
477 }
478
479 union packed_def {
480 uint8_t u8;
481 struct {
482 uint8_t num_components : 3;
483 uint8_t bit_size : 3;
484 uint8_t divergent : 1;
485 uint8_t loop_invariant : 1;
486 };
487 };
488
489 enum intrinsic_const_indices_encoding {
490 /* Use packed_const_indices to store tightly packed indices.
491 *
492 * The common case for load_ubo is 0, 0, 0, which is trivially represented.
493 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
494 */
495 const_indices_all_combined,
496
497 const_indices_8bit, /* 8 bits per element */
498 const_indices_16bit, /* 16 bits per element */
499 const_indices_32bit, /* 32 bits per element */
500 };
501
502 enum load_const_packing {
503 /* Constants are not packed and are stored in following dwords. */
504 load_const_full,
505
506 /* packed_value contains high 19 bits, low bits are 0,
507 * good for floating-point decimals
508 */
509 load_const_scalar_hi_19bits,
510
511 /* packed_value contains low 19 bits, high bits are sign-extended */
512 load_const_scalar_lo_19bits_sext,
513 };
514
515 union packed_instr {
516 uint32_t u32;
517 struct {
518 unsigned instr_type : 4; /* always present */
519 unsigned _pad : 20;
520 unsigned def : 8; /* always last */
521 } any;
522 struct {
523 unsigned instr_type : 4;
524 unsigned exact : 1;
525 unsigned no_signed_wrap : 1;
526 unsigned no_unsigned_wrap : 1;
527 unsigned padding : 1;
528 /* Swizzles for 2 srcs */
529 unsigned two_swizzles : 4;
530 unsigned op : 9;
531 unsigned packed_src_ssa_16bit : 1;
532 /* Scalarized ALUs always have the same header. */
533 unsigned num_followup_alu_sharing_header : 2;
534 unsigned def : 8;
535 } alu;
536 struct {
537 unsigned instr_type : 4;
538 unsigned deref_type : 3;
539 unsigned cast_type_same_as_last : 1;
540 unsigned modes : 6; /* See (de|en)code_deref_modes() */
541 unsigned _pad : 8;
542 unsigned in_bounds : 1;
543 unsigned packed_src_ssa_16bit : 1; /* deref_var redefines this */
544 unsigned def : 8;
545 } deref;
546 struct {
547 unsigned instr_type : 4;
548 unsigned deref_type : 3;
549 unsigned _pad : 1;
550 unsigned object_idx : 16; /* if 0, the object ID is a separate uint32 */
551 unsigned def : 8;
552 } deref_var;
553 struct {
554 unsigned instr_type : 4;
555 unsigned intrinsic : 10;
556 unsigned const_indices_encoding : 2;
557 unsigned packed_const_indices : 8;
558 unsigned def : 8;
559 } intrinsic;
560 struct {
561 unsigned instr_type : 4;
562 unsigned last_component : 4;
563 unsigned bit_size : 3;
564 unsigned packing : 2; /* enum load_const_packing */
565 unsigned packed_value : 19; /* meaning determined by packing */
566 } load_const;
567 struct {
568 unsigned instr_type : 4;
569 unsigned last_component : 4;
570 unsigned bit_size : 3;
571 unsigned _pad : 21;
572 } undef;
573 struct {
574 unsigned instr_type : 4;
575 unsigned num_srcs : 4;
576 unsigned op : 5;
577 unsigned _pad : 11;
578 unsigned def : 8;
579 } tex;
580 struct {
581 unsigned instr_type : 4;
582 unsigned num_srcs : 20;
583 unsigned def : 8;
584 } phi;
585 struct {
586 unsigned instr_type : 4;
587 unsigned type : 2;
588 unsigned _pad : 26;
589 } jump;
590 struct {
591 unsigned instr_type : 4;
592 unsigned type : 4;
593 unsigned string_length : 16;
594 unsigned def : 8;
595 } debug_info;
596 };
597
598 /* Write "lo24" as low 24 bits in the first uint32. */
599 static void
write_def(write_ctx * ctx,const nir_def * def,union packed_instr header,nir_instr_type instr_type)600 write_def(write_ctx *ctx, const nir_def *def, union packed_instr header,
601 nir_instr_type instr_type)
602 {
603 STATIC_ASSERT(sizeof(union packed_def) == 1);
604 union packed_def pdef;
605 pdef.u8 = 0;
606
607 pdef.num_components =
608 encode_num_components_in_3bits(def->num_components);
609 pdef.bit_size = encode_bit_size_3bits(def->bit_size);
610 pdef.divergent = def->divergent;
611 pdef.loop_invariant = def->loop_invariant;
612 header.any.def = pdef.u8;
613
614 /* Check if the current ALU instruction has the same header as the previous
615 * instruction that is also ALU. If it is, we don't have to write
616 * the current header. This is a typical occurence after scalarization.
617 */
618 if (instr_type == nir_instr_type_alu) {
619 bool equal_header = false;
620
621 if (ctx->last_instr_type == nir_instr_type_alu) {
622 assert(ctx->last_alu_header_offset);
623 union packed_instr last_header;
624 last_header.u32 = ctx->last_alu_header;
625
626 /* Clear the field that counts ALUs with equal headers. */
627 union packed_instr clean_header;
628 clean_header.u32 = last_header.u32;
629 clean_header.alu.num_followup_alu_sharing_header = 0;
630
631 /* There can be at most 4 consecutive ALU instructions
632 * sharing the same header.
633 */
634 if (last_header.alu.num_followup_alu_sharing_header < 3 &&
635 header.u32 == clean_header.u32) {
636 last_header.alu.num_followup_alu_sharing_header++;
637 blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset,
638 last_header.u32);
639 ctx->last_alu_header = last_header.u32;
640 equal_header = true;
641 }
642 }
643
644 if (!equal_header) {
645 ctx->last_alu_header_offset = blob_reserve_uint32(ctx->blob);
646 blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, header.u32);
647 ctx->last_alu_header = header.u32;
648 }
649 } else {
650 blob_write_uint32(ctx->blob, header.u32);
651 }
652
653 if (pdef.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
654 blob_write_uint32(ctx->blob, def->num_components);
655
656 write_add_object(ctx, def);
657 }
658
659 static void
read_def(read_ctx * ctx,nir_def * def,nir_instr * instr,union packed_instr header)660 read_def(read_ctx *ctx, nir_def *def, nir_instr *instr,
661 union packed_instr header)
662 {
663 union packed_def pdef;
664 pdef.u8 = header.any.def;
665
666 unsigned bit_size = decode_bit_size_3bits(pdef.bit_size);
667 unsigned num_components;
668 if (pdef.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
669 num_components = blob_read_uint32(ctx->blob);
670 else
671 num_components = decode_num_components_in_3bits(pdef.num_components);
672 nir_def_init(instr, def, num_components, bit_size);
673 def->divergent = pdef.divergent;
674 def->loop_invariant = pdef.loop_invariant;
675 read_add_object(ctx, def);
676 }
677
678 static bool
are_object_ids_16bit(write_ctx * ctx)679 are_object_ids_16bit(write_ctx *ctx)
680 {
681 /* Check the highest object ID, because they are monotonic. */
682 return ctx->next_idx < (1 << 16);
683 }
684
685 static bool
is_alu_src_ssa_16bit(write_ctx * ctx,const nir_alu_instr * alu)686 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
687 {
688 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
689
690 for (unsigned i = 0; i < num_srcs; i++) {
691 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
692
693 for (unsigned chan = 0; chan < src_components; chan++) {
694 /* The swizzles for src0.x and src1.x are stored
695 * in two_swizzles for SSA ALUs.
696 */
697 if (i < 2 && chan == 0 && alu->src[i].swizzle[chan] < 4)
698 continue;
699
700 if (alu->src[i].swizzle[chan] != chan)
701 return false;
702 }
703 }
704
705 return are_object_ids_16bit(ctx);
706 }
707
708 static void
write_alu(write_ctx * ctx,const nir_alu_instr * alu)709 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
710 {
711 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
712
713 /* 9 bits for nir_op */
714 STATIC_ASSERT(nir_num_opcodes <= 512);
715 union packed_instr header;
716 header.u32 = 0;
717
718 header.alu.instr_type = alu->instr.type;
719 header.alu.exact = alu->exact;
720 header.alu.no_signed_wrap = alu->no_signed_wrap;
721 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
722 header.alu.op = alu->op;
723 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
724
725 if (header.alu.packed_src_ssa_16bit) {
726 /* For packed srcs of SSA ALUs, this field stores the swizzles. */
727 header.alu.two_swizzles = alu->src[0].swizzle[0];
728 if (num_srcs > 1)
729 header.alu.two_swizzles |= alu->src[1].swizzle[0] << 2;
730 }
731
732 write_def(ctx, &alu->def, header, alu->instr.type);
733 blob_write_uint32(ctx->blob, alu->fp_fast_math);
734
735 if (header.alu.packed_src_ssa_16bit) {
736 for (unsigned i = 0; i < num_srcs; i++) {
737 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
738 assert(idx < (1 << 16));
739 blob_write_uint16(ctx->blob, idx);
740 }
741 } else {
742 for (unsigned i = 0; i < num_srcs; i++) {
743 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
744 unsigned src_components = nir_src_num_components(alu->src[i].src);
745 union packed_src src;
746 bool packed = src_components <= 4 && src_channels <= 4;
747 src.u32 = 0;
748
749 if (packed) {
750 src.alu.swizzle_x = alu->src[i].swizzle[0];
751 src.alu.swizzle_y = alu->src[i].swizzle[1];
752 src.alu.swizzle_z = alu->src[i].swizzle[2];
753 src.alu.swizzle_w = alu->src[i].swizzle[3];
754 }
755
756 write_src_full(ctx, &alu->src[i].src, src);
757
758 /* Store swizzles for vec8 and vec16. */
759 if (!packed) {
760 for (unsigned o = 0; o < src_channels; o += 8) {
761 unsigned value = 0;
762
763 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
764 value |= (uint32_t)alu->src[i].swizzle[o + j] << (4 * j); /* 4 bits per swizzle */
765 }
766
767 blob_write_uint32(ctx->blob, value);
768 }
769 }
770 }
771 }
772 }
773
774 static nir_alu_instr *
read_alu(read_ctx * ctx,union packed_instr header)775 read_alu(read_ctx *ctx, union packed_instr header)
776 {
777 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
778 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
779
780 alu->exact = header.alu.exact;
781 alu->no_signed_wrap = header.alu.no_signed_wrap;
782 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
783
784 read_def(ctx, &alu->def, &alu->instr, header);
785 alu->fp_fast_math = blob_read_uint32(ctx->blob);
786
787 if (header.alu.packed_src_ssa_16bit) {
788 for (unsigned i = 0; i < num_srcs; i++) {
789 nir_alu_src *src = &alu->src[i];
790 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
791
792 memset(&src->swizzle, 0, sizeof(src->swizzle));
793
794 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
795
796 for (unsigned chan = 0; chan < src_components; chan++)
797 src->swizzle[chan] = chan;
798 }
799 } else {
800 for (unsigned i = 0; i < num_srcs; i++) {
801 union packed_src src = read_src(ctx, &alu->src[i].src);
802 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
803 unsigned src_components = nir_src_num_components(alu->src[i].src);
804 bool packed = src_components <= 4 && src_channels <= 4;
805
806 memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
807
808 if (packed) {
809 alu->src[i].swizzle[0] = src.alu.swizzle_x;
810 alu->src[i].swizzle[1] = src.alu.swizzle_y;
811 alu->src[i].swizzle[2] = src.alu.swizzle_z;
812 alu->src[i].swizzle[3] = src.alu.swizzle_w;
813 } else {
814 /* Load swizzles for vec8 and vec16. */
815 for (unsigned o = 0; o < src_channels; o += 8) {
816 unsigned value = blob_read_uint32(ctx->blob);
817
818 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
819 alu->src[i].swizzle[o + j] =
820 (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
821 }
822 }
823 }
824 }
825 }
826
827 if (header.alu.packed_src_ssa_16bit) {
828 alu->src[0].swizzle[0] = header.alu.two_swizzles & 0x3;
829 if (num_srcs > 1)
830 alu->src[1].swizzle[0] = header.alu.two_swizzles >> 2;
831 }
832
833 return alu;
834 }
835
836 #define NUM_GENERIC_MODES 4
837 #define MODE_ENC_GENERIC_BIT (1 << 5)
838
839 static nir_variable_mode
decode_deref_modes(unsigned modes)840 decode_deref_modes(unsigned modes)
841 {
842 if (modes & MODE_ENC_GENERIC_BIT) {
843 modes &= ~MODE_ENC_GENERIC_BIT;
844 return modes << (ffs(nir_var_mem_generic) - 1);
845 } else {
846 return 1 << modes;
847 }
848 }
849
850 static unsigned
encode_deref_modes(nir_variable_mode modes)851 encode_deref_modes(nir_variable_mode modes)
852 {
853 /* Mode sets on derefs generally come in two forms. For certain OpenCL
854 * cases, we can have more than one of the generic modes set. In this
855 * case, we need the full bitfield. Fortunately, there are only 4 of
856 * these. For all other modes, we can only have one mode at a time so we
857 * can compress them by only storing the bit position. This, plus one bit
858 * to select encoding, lets us pack the entire bitfield in 6 bits.
859 */
860
861 /* Assert that the modes we are compressing fit along with the generic bit
862 */
863 STATIC_ASSERT((nir_num_variable_modes - NUM_GENERIC_MODES) <
864 MODE_ENC_GENERIC_BIT);
865
866 /* Assert that the generic modes are defined at the end of the modes enum
867 */
868 STATIC_ASSERT((nir_var_all & ~nir_var_mem_generic) <
869 (1 << (nir_num_variable_modes - NUM_GENERIC_MODES)));
870
871 unsigned enc;
872 if (modes == 0 || (modes & nir_var_mem_generic)) {
873 assert(!(modes & ~nir_var_mem_generic));
874 enc = modes >> (ffs(nir_var_mem_generic) - 1);
875 assert(enc < MODE_ENC_GENERIC_BIT);
876 enc |= MODE_ENC_GENERIC_BIT;
877 } else {
878 assert(util_is_power_of_two_nonzero(modes));
879 enc = ffs(modes) - 1;
880 assert(enc < MODE_ENC_GENERIC_BIT);
881 }
882 assert(modes == decode_deref_modes(enc));
883 return enc;
884 }
885
886 static void
write_deref(write_ctx * ctx,const nir_deref_instr * deref)887 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
888 {
889 assert(deref->deref_type < 8);
890
891 union packed_instr header;
892 header.u32 = 0;
893
894 header.deref.instr_type = deref->instr.type;
895 header.deref.deref_type = deref->deref_type;
896
897 if (deref->deref_type == nir_deref_type_cast) {
898 header.deref.modes = encode_deref_modes(deref->modes);
899 header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
900 }
901
902 unsigned var_idx = 0;
903 if (deref->deref_type == nir_deref_type_var) {
904 var_idx = write_lookup_object(ctx, deref->var);
905 if (var_idx && var_idx < (1 << 16))
906 header.deref_var.object_idx = var_idx;
907 }
908
909 if (deref->deref_type == nir_deref_type_array ||
910 deref->deref_type == nir_deref_type_ptr_as_array) {
911 header.deref.packed_src_ssa_16bit = are_object_ids_16bit(ctx);
912
913 header.deref.in_bounds = deref->arr.in_bounds;
914 }
915
916 write_def(ctx, &deref->def, header, deref->instr.type);
917
918 switch (deref->deref_type) {
919 case nir_deref_type_var:
920 if (!header.deref_var.object_idx)
921 blob_write_uint32(ctx->blob, var_idx);
922 break;
923
924 case nir_deref_type_struct:
925 write_src(ctx, &deref->parent);
926 blob_write_uint32(ctx->blob, deref->strct.index);
927 break;
928
929 case nir_deref_type_array:
930 case nir_deref_type_ptr_as_array:
931 if (header.deref.packed_src_ssa_16bit) {
932 blob_write_uint16(ctx->blob,
933 write_lookup_object(ctx, deref->parent.ssa));
934 blob_write_uint16(ctx->blob,
935 write_lookup_object(ctx, deref->arr.index.ssa));
936 } else {
937 write_src(ctx, &deref->parent);
938 write_src(ctx, &deref->arr.index);
939 }
940 break;
941
942 case nir_deref_type_cast:
943 write_src(ctx, &deref->parent);
944 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
945 blob_write_uint32(ctx->blob, deref->cast.align_mul);
946 blob_write_uint32(ctx->blob, deref->cast.align_offset);
947 if (!header.deref.cast_type_same_as_last) {
948 encode_type_to_blob(ctx->blob, deref->type);
949 ctx->last_type = deref->type;
950 }
951 break;
952
953 case nir_deref_type_array_wildcard:
954 write_src(ctx, &deref->parent);
955 break;
956
957 default:
958 unreachable("Invalid deref type");
959 }
960 }
961
962 static nir_deref_instr *
read_deref(read_ctx * ctx,union packed_instr header)963 read_deref(read_ctx *ctx, union packed_instr header)
964 {
965 nir_deref_type deref_type = header.deref.deref_type;
966 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
967
968 read_def(ctx, &deref->def, &deref->instr, header);
969
970 nir_deref_instr *parent;
971
972 switch (deref->deref_type) {
973 case nir_deref_type_var:
974 if (header.deref_var.object_idx)
975 deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
976 else
977 deref->var = read_object(ctx);
978
979 deref->type = deref->var->type;
980 break;
981
982 case nir_deref_type_struct:
983 read_src(ctx, &deref->parent);
984 parent = nir_src_as_deref(deref->parent);
985 deref->strct.index = blob_read_uint32(ctx->blob);
986 deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
987 break;
988
989 case nir_deref_type_array:
990 case nir_deref_type_ptr_as_array:
991 if (header.deref.packed_src_ssa_16bit) {
992 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
993 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
994 } else {
995 read_src(ctx, &deref->parent);
996 read_src(ctx, &deref->arr.index);
997 }
998
999 deref->arr.in_bounds = header.deref.in_bounds;
1000
1001 parent = nir_src_as_deref(deref->parent);
1002 if (deref->deref_type == nir_deref_type_array)
1003 deref->type = glsl_get_array_element(parent->type);
1004 else
1005 deref->type = parent->type;
1006 break;
1007
1008 case nir_deref_type_cast:
1009 read_src(ctx, &deref->parent);
1010 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1011 deref->cast.align_mul = blob_read_uint32(ctx->blob);
1012 deref->cast.align_offset = blob_read_uint32(ctx->blob);
1013 if (header.deref.cast_type_same_as_last) {
1014 deref->type = ctx->last_type;
1015 } else {
1016 deref->type = decode_type_from_blob(ctx->blob);
1017 ctx->last_type = deref->type;
1018 }
1019 break;
1020
1021 case nir_deref_type_array_wildcard:
1022 read_src(ctx, &deref->parent);
1023 parent = nir_src_as_deref(deref->parent);
1024 deref->type = glsl_get_array_element(parent->type);
1025 break;
1026
1027 default:
1028 unreachable("Invalid deref type");
1029 }
1030
1031 if (deref_type == nir_deref_type_var) {
1032 deref->modes = deref->var->data.mode;
1033 } else if (deref->deref_type == nir_deref_type_cast) {
1034 deref->modes = decode_deref_modes(header.deref.modes);
1035 } else {
1036 deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
1037 }
1038
1039 return deref;
1040 }
1041
1042 static void
write_intrinsic(write_ctx * ctx,const nir_intrinsic_instr * intrin)1043 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1044 {
1045 /* 10 bits for nir_intrinsic_op */
1046 STATIC_ASSERT(nir_num_intrinsics <= 1024);
1047 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1048 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1049 assert(intrin->intrinsic < 1024);
1050
1051 union packed_instr header;
1052 header.u32 = 0;
1053
1054 header.intrinsic.instr_type = intrin->instr.type;
1055 header.intrinsic.intrinsic = intrin->intrinsic;
1056
1057 /* Analyze constant indices to decide how to encode them. */
1058 if (num_indices) {
1059 unsigned max_bits = 0;
1060 for (unsigned i = 0; i < num_indices; i++) {
1061 unsigned max = util_last_bit(intrin->const_index[i]);
1062 max_bits = MAX2(max_bits, max);
1063 }
1064
1065 if (max_bits * num_indices <= 8) {
1066 header.intrinsic.const_indices_encoding = const_indices_all_combined;
1067
1068 /* Pack all const indices into 8 bits. */
1069 unsigned bit_size = 8 / num_indices;
1070 for (unsigned i = 0; i < num_indices; i++) {
1071 header.intrinsic.packed_const_indices |=
1072 intrin->const_index[i] << (i * bit_size);
1073 }
1074 } else if (max_bits <= 8)
1075 header.intrinsic.const_indices_encoding = const_indices_8bit;
1076 else if (max_bits <= 16)
1077 header.intrinsic.const_indices_encoding = const_indices_16bit;
1078 else
1079 header.intrinsic.const_indices_encoding = const_indices_32bit;
1080 }
1081
1082 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1083 write_def(ctx, &intrin->def, header, intrin->instr.type);
1084 else
1085 blob_write_uint32(ctx->blob, header.u32);
1086
1087 for (unsigned i = 0; i < num_srcs; i++)
1088 write_src(ctx, &intrin->src[i]);
1089
1090 if (num_indices) {
1091 switch (header.intrinsic.const_indices_encoding) {
1092 case const_indices_8bit:
1093 for (unsigned i = 0; i < num_indices; i++)
1094 blob_write_uint8(ctx->blob, intrin->const_index[i]);
1095 break;
1096 case const_indices_16bit:
1097 for (unsigned i = 0; i < num_indices; i++)
1098 blob_write_uint16(ctx->blob, intrin->const_index[i]);
1099 break;
1100 case const_indices_32bit:
1101 for (unsigned i = 0; i < num_indices; i++)
1102 blob_write_uint32(ctx->blob, intrin->const_index[i]);
1103 break;
1104 }
1105 }
1106 }
1107
1108 static nir_intrinsic_instr *
read_intrinsic(read_ctx * ctx,union packed_instr header)1109 read_intrinsic(read_ctx *ctx, union packed_instr header)
1110 {
1111 nir_intrinsic_op op = header.intrinsic.intrinsic;
1112 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1113
1114 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1115 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1116
1117 if (nir_intrinsic_infos[op].has_dest)
1118 read_def(ctx, &intrin->def, &intrin->instr, header);
1119
1120 for (unsigned i = 0; i < num_srcs; i++)
1121 read_src(ctx, &intrin->src[i]);
1122
1123 /* Vectorized instrinsics have num_components same as dst or src that has
1124 * 0 components in the info. Find it.
1125 */
1126 if (nir_intrinsic_infos[op].has_dest &&
1127 nir_intrinsic_infos[op].dest_components == 0) {
1128 intrin->num_components = intrin->def.num_components;
1129 } else {
1130 for (unsigned i = 0; i < num_srcs; i++) {
1131 if (nir_intrinsic_infos[op].src_components[i] == 0) {
1132 intrin->num_components = nir_src_num_components(intrin->src[i]);
1133 break;
1134 }
1135 }
1136 }
1137
1138 if (num_indices) {
1139 switch (header.intrinsic.const_indices_encoding) {
1140 case const_indices_all_combined: {
1141 unsigned bit_size = 8 / num_indices;
1142 unsigned bit_mask = u_bit_consecutive(0, bit_size);
1143 for (unsigned i = 0; i < num_indices; i++) {
1144 intrin->const_index[i] =
1145 (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1146 bit_mask;
1147 }
1148 break;
1149 }
1150 case const_indices_8bit:
1151 for (unsigned i = 0; i < num_indices; i++)
1152 intrin->const_index[i] = blob_read_uint8(ctx->blob);
1153 break;
1154 case const_indices_16bit:
1155 for (unsigned i = 0; i < num_indices; i++)
1156 intrin->const_index[i] = blob_read_uint16(ctx->blob);
1157 break;
1158 case const_indices_32bit:
1159 for (unsigned i = 0; i < num_indices; i++)
1160 intrin->const_index[i] = blob_read_uint32(ctx->blob);
1161 break;
1162 }
1163 }
1164
1165 return intrin;
1166 }
1167
1168 static void
write_load_const(write_ctx * ctx,const nir_load_const_instr * lc)1169 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1170 {
1171 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1172 union packed_instr header;
1173 header.u32 = 0;
1174
1175 header.load_const.instr_type = lc->instr.type;
1176 header.load_const.last_component = lc->def.num_components - 1;
1177 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1178 header.load_const.packing = load_const_full;
1179
1180 /* Try to pack 1-component constants into the 19 free bits in the header. */
1181 if (lc->def.num_components == 1) {
1182 switch (lc->def.bit_size) {
1183 case 64:
1184 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1185 /* packed_value contains high 19 bits, low bits are 0 */
1186 header.load_const.packing = load_const_scalar_hi_19bits;
1187 header.load_const.packed_value = lc->value[0].u64 >> 45;
1188 } else if (util_mask_sign_extend(lc->value[0].i64, 19) == lc->value[0].i64) {
1189 /* packed_value contains low 19 bits, high bits are sign-extended */
1190 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1191 header.load_const.packed_value = lc->value[0].u64;
1192 }
1193 break;
1194
1195 case 32:
1196 if ((lc->value[0].u32 & 0x1fff) == 0) {
1197 header.load_const.packing = load_const_scalar_hi_19bits;
1198 header.load_const.packed_value = lc->value[0].u32 >> 13;
1199 } else if (util_mask_sign_extend(lc->value[0].i32, 19) == lc->value[0].i32) {
1200 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1201 header.load_const.packed_value = lc->value[0].u32;
1202 }
1203 break;
1204
1205 case 16:
1206 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1207 header.load_const.packed_value = lc->value[0].u16;
1208 break;
1209 case 8:
1210 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1211 header.load_const.packed_value = lc->value[0].u8;
1212 break;
1213 case 1:
1214 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1215 header.load_const.packed_value = lc->value[0].b;
1216 break;
1217 default:
1218 unreachable("invalid bit_size");
1219 }
1220 }
1221
1222 blob_write_uint32(ctx->blob, header.u32);
1223
1224 if (header.load_const.packing == load_const_full) {
1225 switch (lc->def.bit_size) {
1226 case 64:
1227 blob_write_bytes(ctx->blob, lc->value,
1228 sizeof(*lc->value) * lc->def.num_components);
1229 break;
1230
1231 case 32:
1232 for (unsigned i = 0; i < lc->def.num_components; i++)
1233 blob_write_uint32(ctx->blob, lc->value[i].u32);
1234 break;
1235
1236 case 16:
1237 for (unsigned i = 0; i < lc->def.num_components; i++)
1238 blob_write_uint16(ctx->blob, lc->value[i].u16);
1239 break;
1240
1241 default:
1242 assert(lc->def.bit_size <= 8);
1243 for (unsigned i = 0; i < lc->def.num_components; i++)
1244 blob_write_uint8(ctx->blob, lc->value[i].u8);
1245 break;
1246 }
1247 }
1248
1249 write_add_object(ctx, &lc->def);
1250 }
1251
1252 static nir_load_const_instr *
read_load_const(read_ctx * ctx,union packed_instr header)1253 read_load_const(read_ctx *ctx, union packed_instr header)
1254 {
1255 nir_load_const_instr *lc =
1256 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1257 decode_bit_size_3bits(header.load_const.bit_size));
1258 lc->def.divergent = false;
1259 lc->def.loop_invariant = true;
1260
1261 switch (header.load_const.packing) {
1262 case load_const_scalar_hi_19bits:
1263 switch (lc->def.bit_size) {
1264 case 64:
1265 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1266 break;
1267 case 32:
1268 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1269 break;
1270 default:
1271 unreachable("invalid bit_size");
1272 }
1273 break;
1274
1275 case load_const_scalar_lo_19bits_sext:
1276 switch (lc->def.bit_size) {
1277 case 64:
1278 lc->value[0].u64 = header.load_const.packed_value;
1279 if (lc->value[0].u64 >> 18)
1280 lc->value[0].u64 |= UINT64_C(0xfffffffffff80000);
1281 break;
1282 case 32:
1283 lc->value[0].u32 = header.load_const.packed_value;
1284 if (lc->value[0].u32 >> 18)
1285 lc->value[0].u32 |= 0xfff80000;
1286 break;
1287 case 16:
1288 lc->value[0].u16 = header.load_const.packed_value;
1289 break;
1290 case 8:
1291 lc->value[0].u8 = header.load_const.packed_value;
1292 break;
1293 case 1:
1294 lc->value[0].b = header.load_const.packed_value;
1295 break;
1296 default:
1297 unreachable("invalid bit_size");
1298 }
1299 break;
1300
1301 case load_const_full:
1302 switch (lc->def.bit_size) {
1303 case 64:
1304 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1305 break;
1306
1307 case 32:
1308 for (unsigned i = 0; i < lc->def.num_components; i++)
1309 lc->value[i].u32 = blob_read_uint32(ctx->blob);
1310 break;
1311
1312 case 16:
1313 for (unsigned i = 0; i < lc->def.num_components; i++)
1314 lc->value[i].u16 = blob_read_uint16(ctx->blob);
1315 break;
1316
1317 default:
1318 assert(lc->def.bit_size <= 8);
1319 for (unsigned i = 0; i < lc->def.num_components; i++)
1320 lc->value[i].u8 = blob_read_uint8(ctx->blob);
1321 break;
1322 }
1323 break;
1324 }
1325
1326 read_add_object(ctx, &lc->def);
1327 return lc;
1328 }
1329
1330 static void
write_ssa_undef(write_ctx * ctx,const nir_undef_instr * undef)1331 write_ssa_undef(write_ctx *ctx, const nir_undef_instr *undef)
1332 {
1333 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1334
1335 union packed_instr header;
1336 header.u32 = 0;
1337
1338 header.undef.instr_type = undef->instr.type;
1339 header.undef.last_component = undef->def.num_components - 1;
1340 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1341
1342 blob_write_uint32(ctx->blob, header.u32);
1343 write_add_object(ctx, &undef->def);
1344 }
1345
1346 static nir_undef_instr *
read_ssa_undef(read_ctx * ctx,union packed_instr header)1347 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1348 {
1349 nir_undef_instr *undef =
1350 nir_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1351 decode_bit_size_3bits(header.undef.bit_size));
1352
1353 undef->def.divergent = false;
1354 undef->def.loop_invariant = true;
1355
1356 read_add_object(ctx, &undef->def);
1357 return undef;
1358 }
1359
1360 union packed_tex_data {
1361 uint32_t u32;
1362 struct {
1363 unsigned sampler_dim : 4;
1364 unsigned dest_type : 8;
1365 unsigned coord_components : 3;
1366 unsigned is_array : 1;
1367 unsigned is_shadow : 1;
1368 unsigned is_new_style_shadow : 1;
1369 unsigned is_sparse : 1;
1370 unsigned component : 2;
1371 unsigned texture_non_uniform : 1;
1372 unsigned sampler_non_uniform : 1;
1373 unsigned array_is_lowered_cube : 1;
1374 unsigned is_gather_implicit_lod : 1;
1375 unsigned unused : 5; /* Mark unused for valgrind. */
1376 } u;
1377 };
1378
1379 static void
write_tex(write_ctx * ctx,const nir_tex_instr * tex)1380 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1381 {
1382 assert(tex->num_srcs < 16);
1383 assert(tex->op < 32);
1384
1385 union packed_instr header;
1386 header.u32 = 0;
1387
1388 header.tex.instr_type = tex->instr.type;
1389 header.tex.num_srcs = tex->num_srcs;
1390 header.tex.op = tex->op;
1391
1392 write_def(ctx, &tex->def, header, tex->instr.type);
1393
1394 blob_write_uint32(ctx->blob, tex->texture_index);
1395 blob_write_uint32(ctx->blob, tex->sampler_index);
1396 blob_write_uint32(ctx->blob, tex->backend_flags);
1397 if (tex->op == nir_texop_tg4)
1398 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1399
1400 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1401 union packed_tex_data packed = {
1402 .u.sampler_dim = tex->sampler_dim,
1403 .u.dest_type = tex->dest_type,
1404 .u.coord_components = tex->coord_components,
1405 .u.is_array = tex->is_array,
1406 .u.is_shadow = tex->is_shadow,
1407 .u.is_new_style_shadow = tex->is_new_style_shadow,
1408 .u.is_sparse = tex->is_sparse,
1409 .u.component = tex->component,
1410 .u.texture_non_uniform = tex->texture_non_uniform,
1411 .u.sampler_non_uniform = tex->sampler_non_uniform,
1412 .u.array_is_lowered_cube = tex->array_is_lowered_cube,
1413 .u.is_gather_implicit_lod = tex->is_gather_implicit_lod,
1414 };
1415 blob_write_uint32(ctx->blob, packed.u32);
1416
1417 for (unsigned i = 0; i < tex->num_srcs; i++) {
1418 union packed_src src;
1419 src.u32 = 0;
1420 src.tex.src_type = tex->src[i].src_type;
1421 write_src_full(ctx, &tex->src[i].src, src);
1422 }
1423 }
1424
1425 static nir_tex_instr *
read_tex(read_ctx * ctx,union packed_instr header)1426 read_tex(read_ctx *ctx, union packed_instr header)
1427 {
1428 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1429
1430 read_def(ctx, &tex->def, &tex->instr, header);
1431
1432 tex->op = header.tex.op;
1433 tex->texture_index = blob_read_uint32(ctx->blob);
1434 tex->sampler_index = blob_read_uint32(ctx->blob);
1435 tex->backend_flags = blob_read_uint32(ctx->blob);
1436 if (tex->op == nir_texop_tg4)
1437 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1438
1439 union packed_tex_data packed;
1440 packed.u32 = blob_read_uint32(ctx->blob);
1441 tex->sampler_dim = packed.u.sampler_dim;
1442 tex->dest_type = packed.u.dest_type;
1443 tex->coord_components = packed.u.coord_components;
1444 tex->is_array = packed.u.is_array;
1445 tex->is_shadow = packed.u.is_shadow;
1446 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1447 tex->is_sparse = packed.u.is_sparse;
1448 tex->component = packed.u.component;
1449 tex->texture_non_uniform = packed.u.texture_non_uniform;
1450 tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1451 tex->array_is_lowered_cube = packed.u.array_is_lowered_cube;
1452 tex->is_gather_implicit_lod = packed.u.is_gather_implicit_lod;
1453
1454 for (unsigned i = 0; i < tex->num_srcs; i++) {
1455 union packed_src src = read_src(ctx, &tex->src[i].src);
1456 tex->src[i].src_type = src.tex.src_type;
1457 }
1458
1459 return tex;
1460 }
1461
1462 static void
write_phi(write_ctx * ctx,const nir_phi_instr * phi)1463 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1464 {
1465 union packed_instr header;
1466 header.u32 = 0;
1467
1468 header.phi.instr_type = phi->instr.type;
1469 header.phi.num_srcs = exec_list_length(&phi->srcs);
1470
1471 /* Phi nodes are special, since they may reference SSA definitions and
1472 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1473 * and then store enough information so that a later fixup pass can fill
1474 * them in correctly.
1475 */
1476 write_def(ctx, &phi->def, header, phi->instr.type);
1477
1478 nir_foreach_phi_src(src, phi) {
1479 size_t blob_offset = blob_reserve_uint32(ctx->blob);
1480 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1481 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1482 write_phi_fixup fixup = {
1483 .blob_offset = blob_offset,
1484 .src = src->src.ssa,
1485 .block = src->pred,
1486 };
1487 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1488 }
1489 }
1490
1491 static void
write_fixup_phis(write_ctx * ctx)1492 write_fixup_phis(write_ctx *ctx)
1493 {
1494 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1495 blob_overwrite_uint32(ctx->blob, fixup->blob_offset,
1496 write_lookup_object(ctx, fixup->src));
1497 blob_overwrite_uint32(ctx->blob, fixup->blob_offset + sizeof(uint32_t),
1498 write_lookup_object(ctx, fixup->block));
1499 }
1500
1501 util_dynarray_clear(&ctx->phi_fixups);
1502 }
1503
1504 static nir_phi_instr *
read_phi(read_ctx * ctx,nir_block * blk,union packed_instr header)1505 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1506 {
1507 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1508
1509 read_def(ctx, &phi->def, &phi->instr, header);
1510
1511 /* For similar reasons as before, we just store the index directly into the
1512 * pointer, and let a later pass resolve the phi sources.
1513 *
1514 * In order to ensure that the copied sources (which are just the indices
1515 * from the blob for now) don't get inserted into the old shader's use-def
1516 * lists, we have to add the phi instruction *before* we set up its
1517 * sources.
1518 */
1519 nir_instr_insert_after_block(blk, &phi->instr);
1520
1521 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1522 nir_def *def = (nir_def *)(uintptr_t)blob_read_uint32(ctx->blob);
1523 nir_block *pred = (nir_block *)(uintptr_t)blob_read_uint32(ctx->blob);
1524 nir_phi_src *src = nir_phi_instr_add_src(phi, pred, def);
1525
1526 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1527 * we have to set the parent_instr manually. It doesn't really matter
1528 * when we do it, so we might as well do it here.
1529 */
1530 nir_src_set_parent_instr(&src->src, &phi->instr);
1531
1532 /* Stash it in the list of phi sources. We'll walk this list and fix up
1533 * sources at the very end of read_function_impl.
1534 */
1535 list_add(&src->src.use_link, &ctx->phi_srcs);
1536 }
1537
1538 return phi;
1539 }
1540
1541 static void
read_fixup_phis(read_ctx * ctx)1542 read_fixup_phis(read_ctx *ctx)
1543 {
1544 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1545 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1546 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1547
1548 /* Remove from this list */
1549 list_del(&src->src.use_link);
1550
1551 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1552 }
1553 assert(list_is_empty(&ctx->phi_srcs));
1554 }
1555
1556 static void
write_jump(write_ctx * ctx,const nir_jump_instr * jmp)1557 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1558 {
1559 /* These aren't handled because they require special block linking */
1560 assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
1561
1562 assert(jmp->type < 4);
1563
1564 union packed_instr header;
1565 header.u32 = 0;
1566
1567 header.jump.instr_type = jmp->instr.type;
1568 header.jump.type = jmp->type;
1569
1570 blob_write_uint32(ctx->blob, header.u32);
1571 }
1572
1573 static nir_jump_instr *
read_jump(read_ctx * ctx,union packed_instr header)1574 read_jump(read_ctx *ctx, union packed_instr header)
1575 {
1576 /* These aren't handled because they require special block linking */
1577 assert(header.jump.type != nir_jump_goto &&
1578 header.jump.type != nir_jump_goto_if);
1579
1580 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1581 return jmp;
1582 }
1583
1584 static void
write_call(write_ctx * ctx,const nir_call_instr * call)1585 write_call(write_ctx *ctx, const nir_call_instr *call)
1586 {
1587 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1588
1589 for (unsigned i = 0; i < call->num_params; i++)
1590 write_src(ctx, &call->params[i]);
1591 }
1592
1593 static nir_call_instr *
read_call(read_ctx * ctx)1594 read_call(read_ctx *ctx)
1595 {
1596 nir_function *callee = read_object(ctx);
1597 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1598
1599 for (unsigned i = 0; i < call->num_params; i++)
1600 read_src(ctx, &call->params[i]);
1601
1602 return call;
1603 }
1604
1605 static void
write_debug_info(write_ctx * ctx,const nir_debug_info_instr * di)1606 write_debug_info(write_ctx *ctx, const nir_debug_info_instr *di)
1607 {
1608 union packed_instr header;
1609 header.u32 = 0;
1610
1611 header.debug_info.instr_type = nir_instr_type_debug_info;
1612 header.debug_info.type = di->type;
1613 header.debug_info.string_length = di->string_length;
1614
1615 switch (di->type) {
1616 case nir_debug_info_src_loc:
1617 blob_write_uint32(ctx->blob, header.u32);
1618 blob_write_uint32(ctx->blob, di->src_loc.line);
1619 blob_write_uint32(ctx->blob, di->src_loc.column);
1620 blob_write_uint32(ctx->blob, di->src_loc.spirv_offset);
1621 blob_write_uint8(ctx->blob, di->src_loc.source);
1622 if (di->src_loc.line)
1623 write_src(ctx, &di->src_loc.filename);
1624 return;
1625 case nir_debug_info_string:
1626 write_def(ctx, &di->def, header, di->instr.type);
1627 blob_write_bytes(ctx->blob, di->string, di->string_length);
1628 return;
1629 }
1630
1631 unreachable("Unimplemented nir_debug_info_type");
1632 }
1633
1634 static nir_debug_info_instr *
read_debug_info(read_ctx * ctx,union packed_instr header)1635 read_debug_info(read_ctx *ctx, union packed_instr header)
1636 {
1637 nir_debug_info_type type = header.debug_info.type;
1638
1639 switch (type) {
1640 case nir_debug_info_src_loc: {
1641 nir_debug_info_instr *di = nir_debug_info_instr_create(ctx->nir, type, 0);
1642 di->src_loc.line = blob_read_uint32(ctx->blob);
1643 di->src_loc.column = blob_read_uint32(ctx->blob);
1644 di->src_loc.spirv_offset = blob_read_uint32(ctx->blob);
1645 di->src_loc.source = blob_read_uint8(ctx->blob);
1646 if (di->src_loc.line)
1647 read_src(ctx, &di->src_loc.filename);
1648 return di;
1649 }
1650 case nir_debug_info_string: {
1651 nir_debug_info_instr *di =
1652 nir_debug_info_instr_create(ctx->nir, type, header.debug_info.string_length);
1653 read_def(ctx, &di->def, &di->instr, header);
1654 memcpy(di->string, blob_read_bytes(ctx->blob, di->string_length), di->string_length);
1655 return di;
1656 }
1657 }
1658
1659 unreachable("Unimplemented nir_debug_info_type");
1660 }
1661
1662 static void
write_instr(write_ctx * ctx,const nir_instr * instr)1663 write_instr(write_ctx *ctx, const nir_instr *instr)
1664 {
1665 /* We have only 4 bits for the instruction type. */
1666 assert(instr->type < 16);
1667
1668 switch (instr->type) {
1669 case nir_instr_type_alu:
1670 write_alu(ctx, nir_instr_as_alu(instr));
1671 break;
1672 case nir_instr_type_deref:
1673 write_deref(ctx, nir_instr_as_deref(instr));
1674 break;
1675 case nir_instr_type_intrinsic:
1676 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1677 break;
1678 case nir_instr_type_load_const:
1679 write_load_const(ctx, nir_instr_as_load_const(instr));
1680 break;
1681 case nir_instr_type_undef:
1682 write_ssa_undef(ctx, nir_instr_as_undef(instr));
1683 break;
1684 case nir_instr_type_tex:
1685 write_tex(ctx, nir_instr_as_tex(instr));
1686 break;
1687 case nir_instr_type_phi:
1688 write_phi(ctx, nir_instr_as_phi(instr));
1689 break;
1690 case nir_instr_type_jump:
1691 write_jump(ctx, nir_instr_as_jump(instr));
1692 break;
1693 case nir_instr_type_call:
1694 blob_write_uint32(ctx->blob, instr->type);
1695 write_call(ctx, nir_instr_as_call(instr));
1696 break;
1697 case nir_instr_type_debug_info:
1698 write_debug_info(ctx, nir_instr_as_debug_info(instr));
1699 break;
1700 case nir_instr_type_parallel_copy:
1701 unreachable("Cannot write parallel copies");
1702 default:
1703 unreachable("bad instr type");
1704 }
1705 }
1706
1707 /* Return the number of instructions read. */
1708 static unsigned
read_instr(read_ctx * ctx,nir_block * block)1709 read_instr(read_ctx *ctx, nir_block *block)
1710 {
1711 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1712 union packed_instr header;
1713 header.u32 = blob_read_uint32(ctx->blob);
1714 nir_instr *instr;
1715
1716 switch (header.any.instr_type) {
1717 case nir_instr_type_alu:
1718 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1719 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1720 return header.alu.num_followup_alu_sharing_header + 1;
1721 case nir_instr_type_deref:
1722 instr = &read_deref(ctx, header)->instr;
1723 break;
1724 case nir_instr_type_intrinsic:
1725 instr = &read_intrinsic(ctx, header)->instr;
1726 break;
1727 case nir_instr_type_load_const:
1728 instr = &read_load_const(ctx, header)->instr;
1729 break;
1730 case nir_instr_type_undef:
1731 instr = &read_ssa_undef(ctx, header)->instr;
1732 break;
1733 case nir_instr_type_tex:
1734 instr = &read_tex(ctx, header)->instr;
1735 break;
1736 case nir_instr_type_phi:
1737 /* Phi instructions are a bit of a special case when reading because we
1738 * don't want inserting the instruction to automatically handle use/defs
1739 * for us. Instead, we need to wait until all the blocks/instructions
1740 * are read so that we can set their sources up.
1741 */
1742 read_phi(ctx, block, header);
1743 return 1;
1744 case nir_instr_type_jump:
1745 instr = &read_jump(ctx, header)->instr;
1746 break;
1747 case nir_instr_type_call:
1748 instr = &read_call(ctx)->instr;
1749 break;
1750 case nir_instr_type_debug_info:
1751 instr = &read_debug_info(ctx, header)->instr;
1752 break;
1753 case nir_instr_type_parallel_copy:
1754 unreachable("Cannot read parallel copies");
1755 default:
1756 unreachable("bad instr type");
1757 }
1758
1759 nir_instr_insert_after_block(block, instr);
1760 return 1;
1761 }
1762
1763 static void
write_block(write_ctx * ctx,const nir_block * block)1764 write_block(write_ctx *ctx, const nir_block *block)
1765 {
1766 write_add_object(ctx, block);
1767 blob_write_uint8(ctx->blob, block->divergent);
1768 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1769
1770 ctx->last_instr_type = ~0;
1771 ctx->last_alu_header_offset = 0;
1772
1773 nir_foreach_instr(instr, block) {
1774 write_instr(ctx, instr);
1775 ctx->last_instr_type = instr->type;
1776 }
1777 }
1778
1779 static void
read_block(read_ctx * ctx,struct exec_list * cf_list)1780 read_block(read_ctx *ctx, struct exec_list *cf_list)
1781 {
1782 /* Don't actually create a new block. Just use the one from the tail of
1783 * the list. NIR guarantees that the tail of the list is a block and that
1784 * no two blocks are side-by-side in the IR; It should be empty.
1785 */
1786 nir_block *block =
1787 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1788
1789 read_add_object(ctx, block);
1790 block->divergent = blob_read_uint8(ctx->blob);
1791 unsigned num_instrs = blob_read_uint32(ctx->blob);
1792 for (unsigned i = 0; i < num_instrs;) {
1793 i += read_instr(ctx, block);
1794 }
1795 }
1796
1797 static void
1798 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1799
1800 static void
1801 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1802
1803 static void
write_if(write_ctx * ctx,nir_if * nif)1804 write_if(write_ctx *ctx, nir_if *nif)
1805 {
1806 write_src(ctx, &nif->condition);
1807 blob_write_uint8(ctx->blob, nif->control);
1808
1809 write_cf_list(ctx, &nif->then_list);
1810 write_cf_list(ctx, &nif->else_list);
1811 }
1812
1813 static void
read_if(read_ctx * ctx,struct exec_list * cf_list)1814 read_if(read_ctx *ctx, struct exec_list *cf_list)
1815 {
1816 nir_if *nif = nir_if_create(ctx->nir);
1817
1818 read_src(ctx, &nif->condition);
1819 nif->control = blob_read_uint8(ctx->blob);
1820
1821 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1822
1823 read_cf_list(ctx, &nif->then_list);
1824 read_cf_list(ctx, &nif->else_list);
1825 }
1826
1827 static void
write_loop(write_ctx * ctx,nir_loop * loop)1828 write_loop(write_ctx *ctx, nir_loop *loop)
1829 {
1830 blob_write_uint8(ctx->blob, loop->control);
1831 blob_write_uint8(ctx->blob, loop->divergent_continue);
1832 blob_write_uint8(ctx->blob, loop->divergent_break);
1833 bool has_continue_construct = nir_loop_has_continue_construct(loop);
1834 blob_write_uint8(ctx->blob, has_continue_construct);
1835
1836 write_cf_list(ctx, &loop->body);
1837 if (has_continue_construct) {
1838 write_cf_list(ctx, &loop->continue_list);
1839 }
1840 }
1841
1842 static void
read_loop(read_ctx * ctx,struct exec_list * cf_list)1843 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1844 {
1845 nir_loop *loop = nir_loop_create(ctx->nir);
1846
1847 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1848
1849 loop->control = blob_read_uint8(ctx->blob);
1850 loop->divergent_continue = blob_read_uint8(ctx->blob);
1851 loop->divergent_break = blob_read_uint8(ctx->blob);
1852 bool has_continue_construct = blob_read_uint8(ctx->blob);
1853
1854 read_cf_list(ctx, &loop->body);
1855 if (has_continue_construct) {
1856 nir_loop_add_continue_construct(loop);
1857 read_cf_list(ctx, &loop->continue_list);
1858 }
1859 }
1860
1861 static void
write_cf_node(write_ctx * ctx,nir_cf_node * cf)1862 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1863 {
1864 blob_write_uint32(ctx->blob, cf->type);
1865
1866 switch (cf->type) {
1867 case nir_cf_node_block:
1868 write_block(ctx, nir_cf_node_as_block(cf));
1869 break;
1870 case nir_cf_node_if:
1871 write_if(ctx, nir_cf_node_as_if(cf));
1872 break;
1873 case nir_cf_node_loop:
1874 write_loop(ctx, nir_cf_node_as_loop(cf));
1875 break;
1876 default:
1877 unreachable("bad cf type");
1878 }
1879 }
1880
1881 static void
read_cf_node(read_ctx * ctx,struct exec_list * list)1882 read_cf_node(read_ctx *ctx, struct exec_list *list)
1883 {
1884 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1885
1886 switch (type) {
1887 case nir_cf_node_block:
1888 read_block(ctx, list);
1889 break;
1890 case nir_cf_node_if:
1891 read_if(ctx, list);
1892 break;
1893 case nir_cf_node_loop:
1894 read_loop(ctx, list);
1895 break;
1896 default:
1897 unreachable("bad cf type");
1898 }
1899 }
1900
1901 static void
write_cf_list(write_ctx * ctx,const struct exec_list * cf_list)1902 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1903 {
1904 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1905 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1906 write_cf_node(ctx, cf);
1907 }
1908 }
1909
1910 static void
read_cf_list(read_ctx * ctx,struct exec_list * cf_list)1911 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1912 {
1913 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1914 for (unsigned i = 0; i < num_cf_nodes; i++)
1915 read_cf_node(ctx, cf_list);
1916 }
1917
1918 static void
write_function_impl(write_ctx * ctx,const nir_function_impl * fi)1919 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1920 {
1921 blob_write_uint8(ctx->blob, fi->structured);
1922 blob_write_uint8(ctx->blob, !!fi->preamble);
1923
1924 if (fi->preamble)
1925 blob_write_uint32(ctx->blob, write_lookup_object(ctx, fi->preamble));
1926
1927 write_var_list(ctx, &fi->locals);
1928
1929 write_cf_list(ctx, &fi->body);
1930 write_fixup_phis(ctx);
1931 }
1932
1933 static nir_function_impl *
read_function_impl(read_ctx * ctx)1934 read_function_impl(read_ctx *ctx)
1935 {
1936 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1937
1938 fi->structured = blob_read_uint8(ctx->blob);
1939 bool preamble = blob_read_uint8(ctx->blob);
1940
1941 if (preamble)
1942 fi->preamble = read_object(ctx);
1943
1944 read_var_list(ctx, &fi->locals);
1945
1946 read_cf_list(ctx, &fi->body);
1947 read_fixup_phis(ctx);
1948
1949 fi->valid_metadata = 0;
1950
1951 return fi;
1952 }
1953
1954 static void
write_function(write_ctx * ctx,const nir_function * fxn)1955 write_function(write_ctx *ctx, const nir_function *fxn)
1956 {
1957 uint32_t flags = 0;
1958 if (fxn->is_entrypoint)
1959 flags |= 0x1;
1960 if (fxn->is_preamble)
1961 flags |= 0x2;
1962 if (fxn->name)
1963 flags |= 0x4;
1964 if (fxn->impl)
1965 flags |= 0x8;
1966 if (fxn->should_inline)
1967 flags |= 0x10;
1968 if (fxn->dont_inline)
1969 flags |= 0x20;
1970 if (fxn->is_subroutine)
1971 flags |= 0x40;
1972 if (fxn->is_tmp_globals_wrapper)
1973 flags |= 0x80;
1974 if (fxn->workgroup_size[0] || fxn->workgroup_size[1] || fxn->workgroup_size[2])
1975 flags |= 0x100;
1976 blob_write_uint32(ctx->blob, flags);
1977 if (fxn->name)
1978 blob_write_string(ctx->blob, fxn->name);
1979
1980 if (flags & 0x100) {
1981 blob_write_uint32(ctx->blob, fxn->workgroup_size[0]);
1982 blob_write_uint32(ctx->blob, fxn->workgroup_size[1]);
1983 blob_write_uint32(ctx->blob, fxn->workgroup_size[2]);
1984 }
1985
1986 blob_write_uint32(ctx->blob, fxn->subroutine_index);
1987 blob_write_uint32(ctx->blob, fxn->num_subroutine_types);
1988 for (unsigned i = 0; i < fxn->num_subroutine_types; i++) {
1989 encode_type_to_blob(ctx->blob, fxn->subroutine_types[i]);
1990 }
1991
1992 write_add_object(ctx, fxn);
1993
1994 blob_write_uint32(ctx->blob, fxn->num_params);
1995 for (unsigned i = 0; i < fxn->num_params; i++) {
1996 uint32_t val =
1997 ((uint32_t)fxn->params[i].num_components) |
1998 ((uint32_t)fxn->params[i].bit_size) << 8;
1999
2000 bool has_name = fxn->params[i].name && !ctx->strip;
2001 if (has_name)
2002 val |= 0x10000;
2003
2004 blob_write_uint32(ctx->blob, val);
2005 if (has_name)
2006 blob_write_string(ctx->blob, fxn->params[i].name);
2007
2008 encode_type_to_blob(ctx->blob, fxn->params[i].type);
2009 blob_write_uint32(ctx->blob, encode_deref_modes(fxn->params[i].mode));
2010 }
2011
2012 /* At first glance, it looks like we should write the function_impl here.
2013 * However, call instructions need to be able to reference at least the
2014 * function and those will get processed as we write the function_impls.
2015 * We stop here and write function_impls as a second pass.
2016 */
2017 }
2018
2019 static void
read_function(read_ctx * ctx)2020 read_function(read_ctx *ctx)
2021 {
2022 uint32_t flags = blob_read_uint32(ctx->blob);
2023
2024 bool has_name = flags & 0x4;
2025 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
2026
2027 nir_function *fxn = nir_function_create(ctx->nir, name);
2028
2029 if (flags & 0x100) {
2030 fxn->workgroup_size[0] = blob_read_uint32(ctx->blob);
2031 fxn->workgroup_size[1] = blob_read_uint32(ctx->blob);
2032 fxn->workgroup_size[2] = blob_read_uint32(ctx->blob);
2033 }
2034
2035 fxn->subroutine_index = blob_read_uint32(ctx->blob);
2036 fxn->num_subroutine_types = blob_read_uint32(ctx->blob);
2037 for (unsigned i = 0; i < fxn->num_subroutine_types; i++) {
2038 fxn->subroutine_types[i] = decode_type_from_blob(ctx->blob);
2039 }
2040
2041 read_add_object(ctx, fxn);
2042
2043 fxn->num_params = blob_read_uint32(ctx->blob);
2044 fxn->params = rzalloc_array(fxn, nir_parameter, fxn->num_params);
2045 for (unsigned i = 0; i < fxn->num_params; i++) {
2046 uint32_t val = blob_read_uint32(ctx->blob);
2047 bool has_name = (val & 0x10000);
2048 if (has_name)
2049 fxn->params[i].name = blob_read_string(ctx->blob);
2050
2051 fxn->params[i].num_components = val & 0xff;
2052 fxn->params[i].bit_size = (val >> 8) & 0xff;
2053 fxn->params[i].type = decode_type_from_blob(ctx->blob);
2054 fxn->params[i].mode = decode_deref_modes(blob_read_uint32(ctx->blob));
2055 }
2056
2057 fxn->is_entrypoint = flags & 0x1;
2058 fxn->is_preamble = flags & 0x2;
2059 if (flags & 0x8)
2060 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
2061 fxn->should_inline = flags & 0x10;
2062 fxn->dont_inline = flags & 0x20;
2063 fxn->is_subroutine = flags & 0x40;
2064 fxn->is_tmp_globals_wrapper = flags & 0x80;
2065 }
2066
2067 static void
write_xfb_info(write_ctx * ctx,const nir_xfb_info * xfb)2068 write_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb)
2069 {
2070 if (xfb == NULL) {
2071 blob_write_uint32(ctx->blob, 0);
2072 } else {
2073 size_t size = nir_xfb_info_size(xfb->output_count);
2074 assert(size <= UINT32_MAX);
2075 blob_write_uint32(ctx->blob, size);
2076 blob_write_bytes(ctx->blob, xfb, size);
2077 }
2078 }
2079
2080 static nir_xfb_info *
read_xfb_info(read_ctx * ctx)2081 read_xfb_info(read_ctx *ctx)
2082 {
2083 uint32_t size = blob_read_uint32(ctx->blob);
2084 if (size == 0)
2085 return NULL;
2086
2087 struct nir_xfb_info *xfb = ralloc_size(ctx->nir, size);
2088 blob_copy_bytes(ctx->blob, (void *)xfb, size);
2089
2090 return xfb;
2091 }
2092
2093 /**
2094 * Serialize NIR into a binary blob.
2095 *
2096 * \param strip Don't serialize information only useful for debugging,
2097 * such as variable names, making cache hits from similar
2098 * shaders more likely.
2099 */
2100 void
nir_serialize(struct blob * blob,const nir_shader * nir,bool strip)2101 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
2102 {
2103 write_ctx ctx = { 0 };
2104 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2105 ctx.blob = blob;
2106 ctx.nir = nir;
2107 ctx.strip = strip;
2108 util_dynarray_init(&ctx.phi_fixups, NULL);
2109
2110 size_t idx_size_offset = blob_reserve_uint32(blob);
2111
2112 struct shader_info info = nir->info;
2113 uint32_t strings = 0;
2114 if (!strip && info.name)
2115 strings |= 0x1;
2116 if (!strip && info.label)
2117 strings |= 0x2;
2118 blob_write_uint32(blob, strings);
2119 if (!strip && info.name)
2120 blob_write_string(blob, info.name);
2121 if (!strip && info.label)
2122 blob_write_string(blob, info.label);
2123 info.name = info.label = NULL;
2124 blob_write_bytes(blob, (uint8_t *)&info, sizeof(info));
2125
2126 write_var_list(&ctx, &nir->variables);
2127
2128 blob_write_uint32(blob, nir->num_inputs);
2129 blob_write_uint32(blob, nir->num_uniforms);
2130 blob_write_uint32(blob, nir->num_outputs);
2131 blob_write_uint32(blob, nir->scratch_size);
2132
2133 blob_write_uint32(blob, exec_list_length(&nir->functions));
2134 nir_foreach_function(fxn, nir) {
2135 write_function(&ctx, fxn);
2136 }
2137
2138 nir_foreach_function_impl(impl, nir) {
2139 write_function_impl(&ctx, impl);
2140 }
2141
2142 blob_write_uint32(blob, nir->constant_data_size);
2143 if (nir->constant_data_size > 0)
2144 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2145
2146 write_xfb_info(&ctx, nir->xfb_info);
2147
2148 if (nir->info.uses_printf)
2149 u_printf_serialize_info(blob, nir->printf_info, nir->printf_info_count);
2150
2151 blob_overwrite_uint32(blob, idx_size_offset, ctx.next_idx);
2152
2153 _mesa_hash_table_destroy(ctx.remap_table, NULL);
2154 util_dynarray_fini(&ctx.phi_fixups);
2155 }
2156
2157 nir_shader *
nir_deserialize(void * mem_ctx,const struct nir_shader_compiler_options * options,struct blob_reader * blob)2158 nir_deserialize(void *mem_ctx,
2159 const struct nir_shader_compiler_options *options,
2160 struct blob_reader *blob)
2161 {
2162 read_ctx ctx = { 0 };
2163 ctx.blob = blob;
2164 list_inithead(&ctx.phi_srcs);
2165 ctx.idx_table_len = blob_read_uint32(blob);
2166 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2167
2168 uint32_t strings = blob_read_uint32(blob);
2169 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2170 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2171
2172 struct shader_info info;
2173 blob_copy_bytes(blob, (uint8_t *)&info, sizeof(info));
2174
2175 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2176
2177 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2178 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2179
2180 ctx.nir->info = info;
2181
2182 read_var_list(&ctx, &ctx.nir->variables);
2183
2184 ctx.nir->num_inputs = blob_read_uint32(blob);
2185 ctx.nir->num_uniforms = blob_read_uint32(blob);
2186 ctx.nir->num_outputs = blob_read_uint32(blob);
2187 ctx.nir->scratch_size = blob_read_uint32(blob);
2188
2189 unsigned num_functions = blob_read_uint32(blob);
2190 for (unsigned i = 0; i < num_functions; i++)
2191 read_function(&ctx);
2192
2193 nir_foreach_function(fxn, ctx.nir) {
2194 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2195 nir_function_set_impl(fxn, read_function_impl(&ctx));
2196 }
2197
2198 ctx.nir->constant_data_size = blob_read_uint32(blob);
2199 if (ctx.nir->constant_data_size > 0) {
2200 ctx.nir->constant_data =
2201 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2202 blob_copy_bytes(blob, ctx.nir->constant_data,
2203 ctx.nir->constant_data_size);
2204 }
2205
2206 ctx.nir->xfb_info = read_xfb_info(&ctx);
2207
2208 if (ctx.nir->info.uses_printf) {
2209 ctx.nir->printf_info =
2210 u_printf_deserialize_info(ctx.nir, blob,
2211 &ctx.nir->printf_info_count);
2212 }
2213
2214 free(ctx.idx_table);
2215
2216 nir_validate_shader(ctx.nir, "after deserialize");
2217
2218 return ctx.nir;
2219 }
2220
2221 void
nir_shader_serialize_deserialize(nir_shader * shader)2222 nir_shader_serialize_deserialize(nir_shader *shader)
2223 {
2224 const struct nir_shader_compiler_options *options = shader->options;
2225
2226 struct blob writer;
2227 blob_init(&writer);
2228 nir_serialize(&writer, shader, false);
2229
2230 /* Delete all of dest's ralloc children but leave dest alone */
2231 void *dead_ctx = ralloc_context(NULL);
2232 ralloc_adopt(dead_ctx, shader);
2233 ralloc_free(dead_ctx);
2234
2235 dead_ctx = ralloc_context(NULL);
2236
2237 struct blob_reader reader;
2238 blob_reader_init(&reader, writer.data, writer.size);
2239 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2240
2241 blob_finish(&writer);
2242
2243 nir_shader_replace(shader, copy);
2244 ralloc_free(dead_ctx);
2245 }
2246