1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
31
32 typedef struct {
33 size_t blob_offset;
34 nir_ssa_def *src;
35 nir_block *block;
36 } write_phi_fixup;
37
38 typedef struct {
39 const nir_shader *nir;
40
41 struct blob *blob;
42
43 /* maps pointer to index */
44 struct hash_table *remap_table;
45
46 /* the next index to assign to a NIR in-memory object */
47 uint32_t next_idx;
48
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
51 */
52 struct util_dynarray phi_fixups;
53
54 /* The last serialized type. */
55 const struct glsl_type *last_type;
56 const struct glsl_type *last_interface_type;
57 struct nir_variable_data last_var_data;
58
59 /* For skipping equal ALU headers (typical after scalarization). */
60 nir_instr_type last_instr_type;
61 uintptr_t last_alu_header_offset;
62
63 /* Don't write optional data such as variable names. */
64 bool strip;
65 } write_ctx;
66
67 typedef struct {
68 nir_shader *nir;
69
70 struct blob_reader *blob;
71
72 /* the next index to assign to a NIR in-memory object */
73 uint32_t next_idx;
74
75 /* The length of the index -> object table */
76 uint32_t idx_table_len;
77
78 /* map from index to deserialized pointer */
79 void **idx_table;
80
81 /* List of phi sources. */
82 struct list_head phi_srcs;
83
84 /* The last deserialized type. */
85 const struct glsl_type *last_type;
86 const struct glsl_type *last_interface_type;
87 struct nir_variable_data last_var_data;
88 } read_ctx;
89
90 static void
write_add_object(write_ctx * ctx,const void * obj)91 write_add_object(write_ctx *ctx, const void *obj)
92 {
93 uint32_t index = ctx->next_idx++;
94 assert(index != MAX_OBJECT_IDS);
95 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
96 }
97
98 static uint32_t
write_lookup_object(write_ctx * ctx,const void * obj)99 write_lookup_object(write_ctx *ctx, const void *obj)
100 {
101 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
102 assert(entry);
103 return (uint32_t)(uintptr_t) entry->data;
104 }
105
106 static void
read_add_object(read_ctx * ctx,void * obj)107 read_add_object(read_ctx *ctx, void *obj)
108 {
109 assert(ctx->next_idx < ctx->idx_table_len);
110 ctx->idx_table[ctx->next_idx++] = obj;
111 }
112
113 static void *
read_lookup_object(read_ctx * ctx,uint32_t idx)114 read_lookup_object(read_ctx *ctx, uint32_t idx)
115 {
116 assert(idx < ctx->idx_table_len);
117 return ctx->idx_table[idx];
118 }
119
120 static void *
read_object(read_ctx * ctx)121 read_object(read_ctx *ctx)
122 {
123 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
124 }
125
126 static uint32_t
encode_bit_size_3bits(uint8_t bit_size)127 encode_bit_size_3bits(uint8_t bit_size)
128 {
129 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
130 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
131 if (bit_size)
132 return util_logbase2(bit_size) + 1;
133 return 0;
134 }
135
136 static uint8_t
decode_bit_size_3bits(uint8_t bit_size)137 decode_bit_size_3bits(uint8_t bit_size)
138 {
139 if (bit_size)
140 return 1 << (bit_size - 1);
141 return 0;
142 }
143
144 #define NUM_COMPONENTS_IS_SEPARATE_7 7
145
146 static uint8_t
encode_num_components_in_3bits(uint8_t num_components)147 encode_num_components_in_3bits(uint8_t num_components)
148 {
149 if (num_components <= 4)
150 return num_components;
151 if (num_components == 8)
152 return 5;
153 if (num_components == 16)
154 return 6;
155
156 /* special value indicating that num_components is in the next uint32 */
157 return NUM_COMPONENTS_IS_SEPARATE_7;
158 }
159
160 static uint8_t
decode_num_components_in_3bits(uint8_t value)161 decode_num_components_in_3bits(uint8_t value)
162 {
163 if (value <= 4)
164 return value;
165 if (value == 5)
166 return 8;
167 if (value == 6)
168 return 16;
169
170 unreachable("invalid num_components encoding");
171 return 0;
172 }
173
174 static void
write_constant(write_ctx * ctx,const nir_constant * c)175 write_constant(write_ctx *ctx, const nir_constant *c)
176 {
177 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
178 blob_write_uint32(ctx->blob, c->num_elements);
179 for (unsigned i = 0; i < c->num_elements; i++)
180 write_constant(ctx, c->elements[i]);
181 }
182
183 static nir_constant *
read_constant(read_ctx * ctx,nir_variable * nvar)184 read_constant(read_ctx *ctx, nir_variable *nvar)
185 {
186 nir_constant *c = ralloc(nvar, nir_constant);
187
188 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
189 c->num_elements = blob_read_uint32(ctx->blob);
190 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
191 for (unsigned i = 0; i < c->num_elements; i++)
192 c->elements[i] = read_constant(ctx, nvar);
193
194 return c;
195 }
196
197 enum var_data_encoding {
198 var_encode_full,
199 var_encode_shader_temp,
200 var_encode_function_temp,
201 var_encode_location_diff,
202 };
203
204 union packed_var {
205 uint32_t u32;
206 struct {
207 unsigned has_name:1;
208 unsigned has_constant_initializer:1;
209 unsigned has_pointer_initializer:1;
210 unsigned has_interface_type:1;
211 unsigned num_state_slots:7;
212 unsigned data_encoding:2;
213 unsigned type_same_as_last:1;
214 unsigned interface_type_same_as_last:1;
215 unsigned _pad:1;
216 unsigned num_members:16;
217 } u;
218 };
219
220 union packed_var_data_diff {
221 uint32_t u32;
222 struct {
223 int location:13;
224 int location_frac:3;
225 int driver_location:16;
226 } u;
227 };
228
229 static void
write_variable(write_ctx * ctx,const nir_variable * var)230 write_variable(write_ctx *ctx, const nir_variable *var)
231 {
232 write_add_object(ctx, var);
233
234 assert(var->num_state_slots < (1 << 7));
235
236 STATIC_ASSERT(sizeof(union packed_var) == 4);
237 union packed_var flags;
238 flags.u32 = 0;
239
240 flags.u.has_name = !ctx->strip && var->name;
241 flags.u.has_constant_initializer = !!(var->constant_initializer);
242 flags.u.has_pointer_initializer = !!(var->pointer_initializer);
243 flags.u.has_interface_type = !!(var->interface_type);
244 flags.u.type_same_as_last = var->type == ctx->last_type;
245 flags.u.interface_type_same_as_last =
246 var->interface_type && var->interface_type == ctx->last_interface_type;
247 flags.u.num_state_slots = var->num_state_slots;
248 flags.u.num_members = var->num_members;
249
250 struct nir_variable_data data = var->data;
251
252 /* When stripping, we expect that the location is no longer needed,
253 * which is typically after shaders are linked.
254 */
255 if (ctx->strip &&
256 data.mode != nir_var_system_value &&
257 data.mode != nir_var_shader_in &&
258 data.mode != nir_var_shader_out)
259 data.location = 0;
260
261 /* Temporary variables don't serialize var->data. */
262 if (data.mode == nir_var_shader_temp)
263 flags.u.data_encoding = var_encode_shader_temp;
264 else if (data.mode == nir_var_function_temp)
265 flags.u.data_encoding = var_encode_function_temp;
266 else {
267 struct nir_variable_data tmp = data;
268
269 tmp.location = ctx->last_var_data.location;
270 tmp.location_frac = ctx->last_var_data.location_frac;
271 tmp.driver_location = ctx->last_var_data.driver_location;
272
273 /* See if we can encode only the difference in locations from the last
274 * variable.
275 */
276 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
277 abs((int)data.location -
278 (int)ctx->last_var_data.location) < (1 << 12) &&
279 abs((int)data.driver_location -
280 (int)ctx->last_var_data.driver_location) < (1 << 15))
281 flags.u.data_encoding = var_encode_location_diff;
282 else
283 flags.u.data_encoding = var_encode_full;
284 }
285
286 blob_write_uint32(ctx->blob, flags.u32);
287
288 if (!flags.u.type_same_as_last) {
289 encode_type_to_blob(ctx->blob, var->type);
290 ctx->last_type = var->type;
291 }
292
293 if (var->interface_type && !flags.u.interface_type_same_as_last) {
294 encode_type_to_blob(ctx->blob, var->interface_type);
295 ctx->last_interface_type = var->interface_type;
296 }
297
298 if (flags.u.has_name)
299 blob_write_string(ctx->blob, var->name);
300
301 if (flags.u.data_encoding == var_encode_full ||
302 flags.u.data_encoding == var_encode_location_diff) {
303 if (flags.u.data_encoding == var_encode_full) {
304 blob_write_bytes(ctx->blob, &data, sizeof(data));
305 } else {
306 /* Serialize only the difference in locations from the last variable.
307 */
308 union packed_var_data_diff diff;
309
310 diff.u.location = data.location - ctx->last_var_data.location;
311 diff.u.location_frac = data.location_frac -
312 ctx->last_var_data.location_frac;
313 diff.u.driver_location = data.driver_location -
314 ctx->last_var_data.driver_location;
315
316 blob_write_uint32(ctx->blob, diff.u32);
317 }
318
319 ctx->last_var_data = data;
320 }
321
322 for (unsigned i = 0; i < var->num_state_slots; i++) {
323 blob_write_bytes(ctx->blob, &var->state_slots[i],
324 sizeof(var->state_slots[i]));
325 }
326 if (var->constant_initializer)
327 write_constant(ctx, var->constant_initializer);
328 if (var->pointer_initializer)
329 write_lookup_object(ctx, var->pointer_initializer);
330 if (var->num_members > 0) {
331 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
332 var->num_members * sizeof(*var->members));
333 }
334 }
335
336 static nir_variable *
read_variable(read_ctx * ctx)337 read_variable(read_ctx *ctx)
338 {
339 nir_variable *var = rzalloc(ctx->nir, nir_variable);
340 read_add_object(ctx, var);
341
342 union packed_var flags;
343 flags.u32 = blob_read_uint32(ctx->blob);
344
345 if (flags.u.type_same_as_last) {
346 var->type = ctx->last_type;
347 } else {
348 var->type = decode_type_from_blob(ctx->blob);
349 ctx->last_type = var->type;
350 }
351
352 if (flags.u.has_interface_type) {
353 if (flags.u.interface_type_same_as_last) {
354 var->interface_type = ctx->last_interface_type;
355 } else {
356 var->interface_type = decode_type_from_blob(ctx->blob);
357 ctx->last_interface_type = var->interface_type;
358 }
359 }
360
361 if (flags.u.has_name) {
362 const char *name = blob_read_string(ctx->blob);
363 var->name = ralloc_strdup(var, name);
364 } else {
365 var->name = NULL;
366 }
367
368 if (flags.u.data_encoding == var_encode_shader_temp)
369 var->data.mode = nir_var_shader_temp;
370 else if (flags.u.data_encoding == var_encode_function_temp)
371 var->data.mode = nir_var_function_temp;
372 else if (flags.u.data_encoding == var_encode_full) {
373 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
374 ctx->last_var_data = var->data;
375 } else { /* var_encode_location_diff */
376 union packed_var_data_diff diff;
377 diff.u32 = blob_read_uint32(ctx->blob);
378
379 var->data = ctx->last_var_data;
380 var->data.location += diff.u.location;
381 var->data.location_frac += diff.u.location_frac;
382 var->data.driver_location += diff.u.driver_location;
383
384 ctx->last_var_data = var->data;
385 }
386
387 var->num_state_slots = flags.u.num_state_slots;
388 if (var->num_state_slots != 0) {
389 var->state_slots = ralloc_array(var, nir_state_slot,
390 var->num_state_slots);
391 for (unsigned i = 0; i < var->num_state_slots; i++) {
392 blob_copy_bytes(ctx->blob, &var->state_slots[i],
393 sizeof(var->state_slots[i]));
394 }
395 }
396 if (flags.u.has_constant_initializer)
397 var->constant_initializer = read_constant(ctx, var);
398 else
399 var->constant_initializer = NULL;
400
401 if (flags.u.has_pointer_initializer)
402 var->pointer_initializer = read_object(ctx);
403 else
404 var->pointer_initializer = NULL;
405
406 var->num_members = flags.u.num_members;
407 if (var->num_members > 0) {
408 var->members = ralloc_array(var, struct nir_variable_data,
409 var->num_members);
410 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
411 var->num_members * sizeof(*var->members));
412 }
413
414 return var;
415 }
416
417 static void
write_var_list(write_ctx * ctx,const struct exec_list * src)418 write_var_list(write_ctx *ctx, const struct exec_list *src)
419 {
420 blob_write_uint32(ctx->blob, exec_list_length(src));
421 foreach_list_typed(nir_variable, var, node, src) {
422 write_variable(ctx, var);
423 }
424 }
425
426 static void
read_var_list(read_ctx * ctx,struct exec_list * dst)427 read_var_list(read_ctx *ctx, struct exec_list *dst)
428 {
429 exec_list_make_empty(dst);
430 unsigned num_vars = blob_read_uint32(ctx->blob);
431 for (unsigned i = 0; i < num_vars; i++) {
432 nir_variable *var = read_variable(ctx);
433 exec_list_push_tail(dst, &var->node);
434 }
435 }
436
437 static void
write_register(write_ctx * ctx,const nir_register * reg)438 write_register(write_ctx *ctx, const nir_register *reg)
439 {
440 write_add_object(ctx, reg);
441 blob_write_uint32(ctx->blob, reg->num_components);
442 blob_write_uint32(ctx->blob, reg->bit_size);
443 blob_write_uint32(ctx->blob, reg->num_array_elems);
444 blob_write_uint32(ctx->blob, reg->index);
445 }
446
447 static nir_register *
read_register(read_ctx * ctx)448 read_register(read_ctx *ctx)
449 {
450 nir_register *reg = ralloc(ctx->nir, nir_register);
451 read_add_object(ctx, reg);
452 reg->num_components = blob_read_uint32(ctx->blob);
453 reg->bit_size = blob_read_uint32(ctx->blob);
454 reg->num_array_elems = blob_read_uint32(ctx->blob);
455 reg->index = blob_read_uint32(ctx->blob);
456
457 list_inithead(®->uses);
458 list_inithead(®->defs);
459 list_inithead(®->if_uses);
460
461 return reg;
462 }
463
464 static void
write_reg_list(write_ctx * ctx,const struct exec_list * src)465 write_reg_list(write_ctx *ctx, const struct exec_list *src)
466 {
467 blob_write_uint32(ctx->blob, exec_list_length(src));
468 foreach_list_typed(nir_register, reg, node, src)
469 write_register(ctx, reg);
470 }
471
472 static void
read_reg_list(read_ctx * ctx,struct exec_list * dst)473 read_reg_list(read_ctx *ctx, struct exec_list *dst)
474 {
475 exec_list_make_empty(dst);
476 unsigned num_regs = blob_read_uint32(ctx->blob);
477 for (unsigned i = 0; i < num_regs; i++) {
478 nir_register *reg = read_register(ctx);
479 exec_list_push_tail(dst, ®->node);
480 }
481 }
482
483 union packed_src {
484 uint32_t u32;
485 struct {
486 unsigned is_ssa:1; /* <-- Header */
487 unsigned is_indirect:1;
488 unsigned object_idx:20;
489 unsigned _footer:10; /* <-- Footer */
490 } any;
491 struct {
492 unsigned _header:22; /* <-- Header */
493 unsigned negate:1; /* <-- Footer */
494 unsigned abs:1;
495 unsigned swizzle_x:2;
496 unsigned swizzle_y:2;
497 unsigned swizzle_z:2;
498 unsigned swizzle_w:2;
499 } alu;
500 struct {
501 unsigned _header:22; /* <-- Header */
502 unsigned src_type:5; /* <-- Footer */
503 unsigned _pad:5;
504 } tex;
505 };
506
507 static void
write_src_full(write_ctx * ctx,const nir_src * src,union packed_src header)508 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
509 {
510 /* Since sources are very frequent, we try to save some space when storing
511 * them. In particular, we store whether the source is a register and
512 * whether the register has an indirect index in the low two bits. We can
513 * assume that the high two bits of the index are zero, since otherwise our
514 * address space would've been exhausted allocating the remap table!
515 */
516 header.any.is_ssa = src->is_ssa;
517 if (src->is_ssa) {
518 header.any.object_idx = write_lookup_object(ctx, src->ssa);
519 blob_write_uint32(ctx->blob, header.u32);
520 } else {
521 header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
522 header.any.is_indirect = !!src->reg.indirect;
523 blob_write_uint32(ctx->blob, header.u32);
524 blob_write_uint32(ctx->blob, src->reg.base_offset);
525 if (src->reg.indirect) {
526 union packed_src header = {0};
527 write_src_full(ctx, src->reg.indirect, header);
528 }
529 }
530 }
531
532 static void
write_src(write_ctx * ctx,const nir_src * src)533 write_src(write_ctx *ctx, const nir_src *src)
534 {
535 union packed_src header = {0};
536 write_src_full(ctx, src, header);
537 }
538
539 static union packed_src
read_src(read_ctx * ctx,nir_src * src,void * mem_ctx)540 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
541 {
542 STATIC_ASSERT(sizeof(union packed_src) == 4);
543 union packed_src header;
544 header.u32 = blob_read_uint32(ctx->blob);
545
546 src->is_ssa = header.any.is_ssa;
547 if (src->is_ssa) {
548 src->ssa = read_lookup_object(ctx, header.any.object_idx);
549 } else {
550 src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
551 src->reg.base_offset = blob_read_uint32(ctx->blob);
552 if (header.any.is_indirect) {
553 src->reg.indirect = malloc(sizeof(nir_src));
554 read_src(ctx, src->reg.indirect, mem_ctx);
555 } else {
556 src->reg.indirect = NULL;
557 }
558 }
559 return header;
560 }
561
562 union packed_dest {
563 uint8_t u8;
564 struct {
565 uint8_t is_ssa:1;
566 uint8_t num_components:3;
567 uint8_t bit_size:3;
568 uint8_t _pad:1;
569 } ssa;
570 struct {
571 uint8_t is_ssa:1;
572 uint8_t is_indirect:1;
573 uint8_t _pad:6;
574 } reg;
575 };
576
577 enum intrinsic_const_indices_encoding {
578 /* Use the 9 bits of packed_const_indices to store 1-9 indices.
579 * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
580 * 4 2-bit indices, or 5-9 1-bit indices.
581 *
582 * The common case for load_ubo is 0, 0, 0, which is trivially represented.
583 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
584 */
585 const_indices_9bit_all_combined,
586
587 const_indices_8bit, /* 8 bits per element */
588 const_indices_16bit, /* 16 bits per element */
589 const_indices_32bit, /* 32 bits per element */
590 };
591
592 enum load_const_packing {
593 /* Constants are not packed and are stored in following dwords. */
594 load_const_full,
595
596 /* packed_value contains high 19 bits, low bits are 0,
597 * good for floating-point decimals
598 */
599 load_const_scalar_hi_19bits,
600
601 /* packed_value contains low 19 bits, high bits are sign-extended */
602 load_const_scalar_lo_19bits_sext,
603 };
604
605 union packed_instr {
606 uint32_t u32;
607 struct {
608 unsigned instr_type:4; /* always present */
609 unsigned _pad:20;
610 unsigned dest:8; /* always last */
611 } any;
612 struct {
613 unsigned instr_type:4;
614 unsigned exact:1;
615 unsigned no_signed_wrap:1;
616 unsigned no_unsigned_wrap:1;
617 unsigned saturate:1;
618 /* Reg: writemask; SSA: swizzles for 2 srcs */
619 unsigned writemask_or_two_swizzles:4;
620 unsigned op:9;
621 unsigned packed_src_ssa_16bit:1;
622 /* Scalarized ALUs always have the same header. */
623 unsigned num_followup_alu_sharing_header:2;
624 unsigned dest:8;
625 } alu;
626 struct {
627 unsigned instr_type:4;
628 unsigned deref_type:3;
629 unsigned cast_type_same_as_last:1;
630 unsigned modes:14; /* deref_var redefines this */
631 unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
632 unsigned _pad:1; /* deref_var redefines this */
633 unsigned dest:8;
634 } deref;
635 struct {
636 unsigned instr_type:4;
637 unsigned deref_type:3;
638 unsigned _pad:1;
639 unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
640 unsigned dest:8;
641 } deref_var;
642 struct {
643 unsigned instr_type:4;
644 unsigned intrinsic:9;
645 unsigned const_indices_encoding:2;
646 unsigned packed_const_indices:9;
647 unsigned dest:8;
648 } intrinsic;
649 struct {
650 unsigned instr_type:4;
651 unsigned last_component:4;
652 unsigned bit_size:3;
653 unsigned packing:2; /* enum load_const_packing */
654 unsigned packed_value:19; /* meaning determined by packing */
655 } load_const;
656 struct {
657 unsigned instr_type:4;
658 unsigned last_component:4;
659 unsigned bit_size:3;
660 unsigned _pad:21;
661 } undef;
662 struct {
663 unsigned instr_type:4;
664 unsigned num_srcs:4;
665 unsigned op:4;
666 unsigned dest:8;
667 unsigned _pad:12;
668 } tex;
669 struct {
670 unsigned instr_type:4;
671 unsigned num_srcs:20;
672 unsigned dest:8;
673 } phi;
674 struct {
675 unsigned instr_type:4;
676 unsigned type:2;
677 unsigned _pad:26;
678 } jump;
679 };
680
681 /* Write "lo24" as low 24 bits in the first uint32. */
682 static void
write_dest(write_ctx * ctx,const nir_dest * dst,union packed_instr header,nir_instr_type instr_type)683 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
684 nir_instr_type instr_type)
685 {
686 STATIC_ASSERT(sizeof(union packed_dest) == 1);
687 union packed_dest dest;
688 dest.u8 = 0;
689
690 dest.ssa.is_ssa = dst->is_ssa;
691 if (dst->is_ssa) {
692 dest.ssa.num_components =
693 encode_num_components_in_3bits(dst->ssa.num_components);
694 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
695 } else {
696 dest.reg.is_indirect = !!(dst->reg.indirect);
697 }
698 header.any.dest = dest.u8;
699
700 /* Check if the current ALU instruction has the same header as the previous
701 * instruction that is also ALU. If it is, we don't have to write
702 * the current header. This is a typical occurence after scalarization.
703 */
704 if (instr_type == nir_instr_type_alu) {
705 bool equal_header = false;
706
707 if (ctx->last_instr_type == nir_instr_type_alu) {
708 assert(ctx->last_alu_header_offset);
709 union packed_instr last_header;
710 memcpy(&last_header, ctx->blob->data + ctx->last_alu_header_offset,
711 sizeof(last_header));
712
713 /* Clear the field that counts ALUs with equal headers. */
714 union packed_instr clean_header;
715 clean_header.u32 = last_header.u32;
716 clean_header.alu.num_followup_alu_sharing_header = 0;
717
718 /* There can be at most 4 consecutive ALU instructions
719 * sharing the same header.
720 */
721 if (last_header.alu.num_followup_alu_sharing_header < 3 &&
722 header.u32 == clean_header.u32) {
723 last_header.alu.num_followup_alu_sharing_header++;
724 memcpy(ctx->blob->data + ctx->last_alu_header_offset,
725 &last_header, sizeof(last_header));
726
727 equal_header = true;
728 }
729 }
730
731 if (!equal_header) {
732 ctx->last_alu_header_offset = ctx->blob->size;
733 blob_write_uint32(ctx->blob, header.u32);
734 }
735 } else {
736 blob_write_uint32(ctx->blob, header.u32);
737 }
738
739 if (dest.ssa.is_ssa &&
740 dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
741 blob_write_uint32(ctx->blob, dst->ssa.num_components);
742
743 if (dst->is_ssa) {
744 write_add_object(ctx, &dst->ssa);
745 } else {
746 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
747 blob_write_uint32(ctx->blob, dst->reg.base_offset);
748 if (dst->reg.indirect)
749 write_src(ctx, dst->reg.indirect);
750 }
751 }
752
753 static void
read_dest(read_ctx * ctx,nir_dest * dst,nir_instr * instr,union packed_instr header)754 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
755 union packed_instr header)
756 {
757 union packed_dest dest;
758 dest.u8 = header.any.dest;
759
760 if (dest.ssa.is_ssa) {
761 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
762 unsigned num_components;
763 if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
764 num_components = blob_read_uint32(ctx->blob);
765 else
766 num_components = decode_num_components_in_3bits(dest.ssa.num_components);
767 nir_ssa_dest_init(instr, dst, num_components, bit_size, NULL);
768 read_add_object(ctx, &dst->ssa);
769 } else {
770 dst->reg.reg = read_object(ctx);
771 dst->reg.base_offset = blob_read_uint32(ctx->blob);
772 if (dest.reg.is_indirect) {
773 dst->reg.indirect = malloc(sizeof(nir_src));
774 read_src(ctx, dst->reg.indirect, instr);
775 }
776 }
777 }
778
779 static bool
are_object_ids_16bit(write_ctx * ctx)780 are_object_ids_16bit(write_ctx *ctx)
781 {
782 /* Check the highest object ID, because they are monotonic. */
783 return ctx->next_idx < (1 << 16);
784 }
785
786 static bool
is_alu_src_ssa_16bit(write_ctx * ctx,const nir_alu_instr * alu)787 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
788 {
789 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
790
791 for (unsigned i = 0; i < num_srcs; i++) {
792 if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
793 return false;
794
795 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
796
797 for (unsigned chan = 0; chan < src_components; chan++) {
798 /* The swizzles for src0.x and src1.x are stored
799 * in writemask_or_two_swizzles for SSA ALUs.
800 */
801 if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
802 alu->src[i].swizzle[chan] < 4)
803 continue;
804
805 if (alu->src[i].swizzle[chan] != chan)
806 return false;
807 }
808 }
809
810 return are_object_ids_16bit(ctx);
811 }
812
813 static void
write_alu(write_ctx * ctx,const nir_alu_instr * alu)814 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
815 {
816 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
817 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
818
819 /* 9 bits for nir_op */
820 STATIC_ASSERT(nir_num_opcodes <= 512);
821 union packed_instr header;
822 header.u32 = 0;
823
824 header.alu.instr_type = alu->instr.type;
825 header.alu.exact = alu->exact;
826 header.alu.no_signed_wrap = alu->no_signed_wrap;
827 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
828 header.alu.saturate = alu->dest.saturate;
829 header.alu.op = alu->op;
830 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
831
832 if (header.alu.packed_src_ssa_16bit &&
833 alu->dest.dest.is_ssa) {
834 /* For packed srcs of SSA ALUs, this field stores the swizzles. */
835 header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
836 if (num_srcs > 1)
837 header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
838 } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
839 /* For vec4 registers, this field is a writemask. */
840 header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
841 }
842
843 write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
844
845 if (!alu->dest.dest.is_ssa && dst_components > 4)
846 blob_write_uint32(ctx->blob, alu->dest.write_mask);
847
848 if (header.alu.packed_src_ssa_16bit) {
849 for (unsigned i = 0; i < num_srcs; i++) {
850 assert(alu->src[i].src.is_ssa);
851 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
852 assert(idx < (1 << 16));
853 blob_write_uint16(ctx->blob, idx);
854 }
855 } else {
856 for (unsigned i = 0; i < num_srcs; i++) {
857 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
858 unsigned src_components = nir_src_num_components(alu->src[i].src);
859 union packed_src src;
860 bool packed = src_components <= 4 && src_channels <= 4;
861 src.u32 = 0;
862
863 src.alu.negate = alu->src[i].negate;
864 src.alu.abs = alu->src[i].abs;
865
866 if (packed) {
867 src.alu.swizzle_x = alu->src[i].swizzle[0];
868 src.alu.swizzle_y = alu->src[i].swizzle[1];
869 src.alu.swizzle_z = alu->src[i].swizzle[2];
870 src.alu.swizzle_w = alu->src[i].swizzle[3];
871 }
872
873 write_src_full(ctx, &alu->src[i].src, src);
874
875 /* Store swizzles for vec8 and vec16. */
876 if (!packed) {
877 for (unsigned o = 0; o < src_channels; o += 8) {
878 unsigned value = 0;
879
880 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
881 value |= (uint32_t)alu->src[i].swizzle[o + j] <<
882 (4 * j); /* 4 bits per swizzle */
883 }
884
885 blob_write_uint32(ctx->blob, value);
886 }
887 }
888 }
889 }
890 }
891
892 static nir_alu_instr *
read_alu(read_ctx * ctx,union packed_instr header)893 read_alu(read_ctx *ctx, union packed_instr header)
894 {
895 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
896 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
897
898 alu->exact = header.alu.exact;
899 alu->no_signed_wrap = header.alu.no_signed_wrap;
900 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
901 alu->dest.saturate = header.alu.saturate;
902
903 read_dest(ctx, &alu->dest.dest, &alu->instr, header);
904
905 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
906
907 if (alu->dest.dest.is_ssa) {
908 alu->dest.write_mask = u_bit_consecutive(0, dst_components);
909 } else if (dst_components <= 4) {
910 alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
911 } else {
912 alu->dest.write_mask = blob_read_uint32(ctx->blob);
913 }
914
915 if (header.alu.packed_src_ssa_16bit) {
916 for (unsigned i = 0; i < num_srcs; i++) {
917 nir_alu_src *src = &alu->src[i];
918 src->src.is_ssa = true;
919 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
920
921 memset(&src->swizzle, 0, sizeof(src->swizzle));
922
923 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
924
925 for (unsigned chan = 0; chan < src_components; chan++)
926 src->swizzle[chan] = chan;
927 }
928 } else {
929 for (unsigned i = 0; i < num_srcs; i++) {
930 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
931 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
932 unsigned src_components = nir_src_num_components(alu->src[i].src);
933 bool packed = src_components <= 4 && src_channels <= 4;
934
935 alu->src[i].negate = src.alu.negate;
936 alu->src[i].abs = src.alu.abs;
937
938 memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
939
940 if (packed) {
941 alu->src[i].swizzle[0] = src.alu.swizzle_x;
942 alu->src[i].swizzle[1] = src.alu.swizzle_y;
943 alu->src[i].swizzle[2] = src.alu.swizzle_z;
944 alu->src[i].swizzle[3] = src.alu.swizzle_w;
945 } else {
946 /* Load swizzles for vec8 and vec16. */
947 for (unsigned o = 0; o < src_channels; o += 8) {
948 unsigned value = blob_read_uint32(ctx->blob);
949
950 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
951 alu->src[i].swizzle[o + j] =
952 (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
953 }
954 }
955 }
956 }
957 }
958
959 if (header.alu.packed_src_ssa_16bit &&
960 alu->dest.dest.is_ssa) {
961 alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
962 if (num_srcs > 1)
963 alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
964 }
965
966 return alu;
967 }
968
969 static void
write_deref(write_ctx * ctx,const nir_deref_instr * deref)970 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
971 {
972 assert(deref->deref_type < 8);
973 assert(deref->modes < (1 << 14));
974
975 union packed_instr header;
976 header.u32 = 0;
977
978 header.deref.instr_type = deref->instr.type;
979 header.deref.deref_type = deref->deref_type;
980
981 if (deref->deref_type == nir_deref_type_cast) {
982 header.deref.modes = deref->modes;
983 header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
984 }
985
986 unsigned var_idx = 0;
987 if (deref->deref_type == nir_deref_type_var) {
988 var_idx = write_lookup_object(ctx, deref->var);
989 if (var_idx && var_idx < (1 << 16))
990 header.deref_var.object_idx = var_idx;
991 }
992
993 if (deref->deref_type == nir_deref_type_array ||
994 deref->deref_type == nir_deref_type_ptr_as_array) {
995 header.deref.packed_src_ssa_16bit =
996 deref->parent.is_ssa && deref->arr.index.is_ssa &&
997 are_object_ids_16bit(ctx);
998 }
999
1000 write_dest(ctx, &deref->dest, header, deref->instr.type);
1001
1002 switch (deref->deref_type) {
1003 case nir_deref_type_var:
1004 if (!header.deref_var.object_idx)
1005 blob_write_uint32(ctx->blob, var_idx);
1006 break;
1007
1008 case nir_deref_type_struct:
1009 write_src(ctx, &deref->parent);
1010 blob_write_uint32(ctx->blob, deref->strct.index);
1011 break;
1012
1013 case nir_deref_type_array:
1014 case nir_deref_type_ptr_as_array:
1015 if (header.deref.packed_src_ssa_16bit) {
1016 blob_write_uint16(ctx->blob,
1017 write_lookup_object(ctx, deref->parent.ssa));
1018 blob_write_uint16(ctx->blob,
1019 write_lookup_object(ctx, deref->arr.index.ssa));
1020 } else {
1021 write_src(ctx, &deref->parent);
1022 write_src(ctx, &deref->arr.index);
1023 }
1024 break;
1025
1026 case nir_deref_type_cast:
1027 write_src(ctx, &deref->parent);
1028 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1029 blob_write_uint32(ctx->blob, deref->cast.align_mul);
1030 blob_write_uint32(ctx->blob, deref->cast.align_offset);
1031 if (!header.deref.cast_type_same_as_last) {
1032 encode_type_to_blob(ctx->blob, deref->type);
1033 ctx->last_type = deref->type;
1034 }
1035 break;
1036
1037 case nir_deref_type_array_wildcard:
1038 write_src(ctx, &deref->parent);
1039 break;
1040
1041 default:
1042 unreachable("Invalid deref type");
1043 }
1044 }
1045
1046 static nir_deref_instr *
read_deref(read_ctx * ctx,union packed_instr header)1047 read_deref(read_ctx *ctx, union packed_instr header)
1048 {
1049 nir_deref_type deref_type = header.deref.deref_type;
1050 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1051
1052 read_dest(ctx, &deref->dest, &deref->instr, header);
1053
1054 nir_deref_instr *parent;
1055
1056 switch (deref->deref_type) {
1057 case nir_deref_type_var:
1058 if (header.deref_var.object_idx)
1059 deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1060 else
1061 deref->var = read_object(ctx);
1062
1063 deref->type = deref->var->type;
1064 break;
1065
1066 case nir_deref_type_struct:
1067 read_src(ctx, &deref->parent, &deref->instr);
1068 parent = nir_src_as_deref(deref->parent);
1069 deref->strct.index = blob_read_uint32(ctx->blob);
1070 deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1071 break;
1072
1073 case nir_deref_type_array:
1074 case nir_deref_type_ptr_as_array:
1075 if (header.deref.packed_src_ssa_16bit) {
1076 deref->parent.is_ssa = true;
1077 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1078 deref->arr.index.is_ssa = true;
1079 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1080 } else {
1081 read_src(ctx, &deref->parent, &deref->instr);
1082 read_src(ctx, &deref->arr.index, &deref->instr);
1083 }
1084
1085 parent = nir_src_as_deref(deref->parent);
1086 if (deref->deref_type == nir_deref_type_array)
1087 deref->type = glsl_get_array_element(parent->type);
1088 else
1089 deref->type = parent->type;
1090 break;
1091
1092 case nir_deref_type_cast:
1093 read_src(ctx, &deref->parent, &deref->instr);
1094 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1095 deref->cast.align_mul = blob_read_uint32(ctx->blob);
1096 deref->cast.align_offset = blob_read_uint32(ctx->blob);
1097 if (header.deref.cast_type_same_as_last) {
1098 deref->type = ctx->last_type;
1099 } else {
1100 deref->type = decode_type_from_blob(ctx->blob);
1101 ctx->last_type = deref->type;
1102 }
1103 break;
1104
1105 case nir_deref_type_array_wildcard:
1106 read_src(ctx, &deref->parent, &deref->instr);
1107 parent = nir_src_as_deref(deref->parent);
1108 deref->type = glsl_get_array_element(parent->type);
1109 break;
1110
1111 default:
1112 unreachable("Invalid deref type");
1113 }
1114
1115 if (deref_type == nir_deref_type_var) {
1116 deref->modes = deref->var->data.mode;
1117 } else if (deref->deref_type == nir_deref_type_cast) {
1118 deref->modes = header.deref.modes;
1119 } else {
1120 assert(deref->parent.is_ssa);
1121 deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
1122 }
1123
1124 return deref;
1125 }
1126
1127 static void
write_intrinsic(write_ctx * ctx,const nir_intrinsic_instr * intrin)1128 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1129 {
1130 /* 9 bits for nir_intrinsic_op */
1131 STATIC_ASSERT(nir_num_intrinsics <= 512);
1132 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1133 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1134 assert(intrin->intrinsic < 512);
1135
1136 union packed_instr header;
1137 header.u32 = 0;
1138
1139 header.intrinsic.instr_type = intrin->instr.type;
1140 header.intrinsic.intrinsic = intrin->intrinsic;
1141
1142 /* Analyze constant indices to decide how to encode them. */
1143 if (num_indices) {
1144 unsigned max_bits = 0;
1145 for (unsigned i = 0; i < num_indices; i++) {
1146 unsigned max = util_last_bit(intrin->const_index[i]);
1147 max_bits = MAX2(max_bits, max);
1148 }
1149
1150 if (max_bits * num_indices <= 9) {
1151 header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined;
1152
1153 /* Pack all const indices into 6 bits. */
1154 unsigned bit_size = 9 / num_indices;
1155 for (unsigned i = 0; i < num_indices; i++) {
1156 header.intrinsic.packed_const_indices |=
1157 intrin->const_index[i] << (i * bit_size);
1158 }
1159 } else if (max_bits <= 8)
1160 header.intrinsic.const_indices_encoding = const_indices_8bit;
1161 else if (max_bits <= 16)
1162 header.intrinsic.const_indices_encoding = const_indices_16bit;
1163 else
1164 header.intrinsic.const_indices_encoding = const_indices_32bit;
1165 }
1166
1167 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1168 write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1169 else
1170 blob_write_uint32(ctx->blob, header.u32);
1171
1172 for (unsigned i = 0; i < num_srcs; i++)
1173 write_src(ctx, &intrin->src[i]);
1174
1175 if (num_indices) {
1176 switch (header.intrinsic.const_indices_encoding) {
1177 case const_indices_8bit:
1178 for (unsigned i = 0; i < num_indices; i++)
1179 blob_write_uint8(ctx->blob, intrin->const_index[i]);
1180 break;
1181 case const_indices_16bit:
1182 for (unsigned i = 0; i < num_indices; i++)
1183 blob_write_uint16(ctx->blob, intrin->const_index[i]);
1184 break;
1185 case const_indices_32bit:
1186 for (unsigned i = 0; i < num_indices; i++)
1187 blob_write_uint32(ctx->blob, intrin->const_index[i]);
1188 break;
1189 }
1190 }
1191 }
1192
1193 static nir_intrinsic_instr *
read_intrinsic(read_ctx * ctx,union packed_instr header)1194 read_intrinsic(read_ctx *ctx, union packed_instr header)
1195 {
1196 nir_intrinsic_op op = header.intrinsic.intrinsic;
1197 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1198
1199 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1200 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1201
1202 if (nir_intrinsic_infos[op].has_dest)
1203 read_dest(ctx, &intrin->dest, &intrin->instr, header);
1204
1205 for (unsigned i = 0; i < num_srcs; i++)
1206 read_src(ctx, &intrin->src[i], &intrin->instr);
1207
1208 /* Vectorized instrinsics have num_components same as dst or src that has
1209 * 0 components in the info. Find it.
1210 */
1211 if (nir_intrinsic_infos[op].has_dest &&
1212 nir_intrinsic_infos[op].dest_components == 0) {
1213 intrin->num_components = nir_dest_num_components(intrin->dest);
1214 } else {
1215 for (unsigned i = 0; i < num_srcs; i++) {
1216 if (nir_intrinsic_infos[op].src_components[i] == 0) {
1217 intrin->num_components = nir_src_num_components(intrin->src[i]);
1218 break;
1219 }
1220 }
1221 }
1222
1223 if (num_indices) {
1224 switch (header.intrinsic.const_indices_encoding) {
1225 case const_indices_9bit_all_combined: {
1226 unsigned bit_size = 9 / num_indices;
1227 unsigned bit_mask = u_bit_consecutive(0, bit_size);
1228 for (unsigned i = 0; i < num_indices; i++) {
1229 intrin->const_index[i] =
1230 (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1231 bit_mask;
1232 }
1233 break;
1234 }
1235 case const_indices_8bit:
1236 for (unsigned i = 0; i < num_indices; i++)
1237 intrin->const_index[i] = blob_read_uint8(ctx->blob);
1238 break;
1239 case const_indices_16bit:
1240 for (unsigned i = 0; i < num_indices; i++)
1241 intrin->const_index[i] = blob_read_uint16(ctx->blob);
1242 break;
1243 case const_indices_32bit:
1244 for (unsigned i = 0; i < num_indices; i++)
1245 intrin->const_index[i] = blob_read_uint32(ctx->blob);
1246 break;
1247 }
1248 }
1249
1250 return intrin;
1251 }
1252
1253 static void
write_load_const(write_ctx * ctx,const nir_load_const_instr * lc)1254 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1255 {
1256 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1257 union packed_instr header;
1258 header.u32 = 0;
1259
1260 header.load_const.instr_type = lc->instr.type;
1261 header.load_const.last_component = lc->def.num_components - 1;
1262 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1263 header.load_const.packing = load_const_full;
1264
1265 /* Try to pack 1-component constants into the 19 free bits in the header. */
1266 if (lc->def.num_components == 1) {
1267 switch (lc->def.bit_size) {
1268 case 64:
1269 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1270 /* packed_value contains high 19 bits, low bits are 0 */
1271 header.load_const.packing = load_const_scalar_hi_19bits;
1272 header.load_const.packed_value = lc->value[0].u64 >> 45;
1273 } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
1274 /* packed_value contains low 19 bits, high bits are sign-extended */
1275 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1276 header.load_const.packed_value = lc->value[0].u64;
1277 }
1278 break;
1279
1280 case 32:
1281 if ((lc->value[0].u32 & 0x1fff) == 0) {
1282 header.load_const.packing = load_const_scalar_hi_19bits;
1283 header.load_const.packed_value = lc->value[0].u32 >> 13;
1284 } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
1285 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1286 header.load_const.packed_value = lc->value[0].u32;
1287 }
1288 break;
1289
1290 case 16:
1291 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1292 header.load_const.packed_value = lc->value[0].u16;
1293 break;
1294 case 8:
1295 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1296 header.load_const.packed_value = lc->value[0].u8;
1297 break;
1298 case 1:
1299 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1300 header.load_const.packed_value = lc->value[0].b;
1301 break;
1302 default:
1303 unreachable("invalid bit_size");
1304 }
1305 }
1306
1307 blob_write_uint32(ctx->blob, header.u32);
1308
1309 if (header.load_const.packing == load_const_full) {
1310 switch (lc->def.bit_size) {
1311 case 64:
1312 blob_write_bytes(ctx->blob, lc->value,
1313 sizeof(*lc->value) * lc->def.num_components);
1314 break;
1315
1316 case 32:
1317 for (unsigned i = 0; i < lc->def.num_components; i++)
1318 blob_write_uint32(ctx->blob, lc->value[i].u32);
1319 break;
1320
1321 case 16:
1322 for (unsigned i = 0; i < lc->def.num_components; i++)
1323 blob_write_uint16(ctx->blob, lc->value[i].u16);
1324 break;
1325
1326 default:
1327 assert(lc->def.bit_size <= 8);
1328 for (unsigned i = 0; i < lc->def.num_components; i++)
1329 blob_write_uint8(ctx->blob, lc->value[i].u8);
1330 break;
1331 }
1332 }
1333
1334 write_add_object(ctx, &lc->def);
1335 }
1336
1337 static nir_load_const_instr *
read_load_const(read_ctx * ctx,union packed_instr header)1338 read_load_const(read_ctx *ctx, union packed_instr header)
1339 {
1340 nir_load_const_instr *lc =
1341 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1342 decode_bit_size_3bits(header.load_const.bit_size));
1343
1344 switch (header.load_const.packing) {
1345 case load_const_scalar_hi_19bits:
1346 switch (lc->def.bit_size) {
1347 case 64:
1348 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1349 break;
1350 case 32:
1351 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1352 break;
1353 default:
1354 unreachable("invalid bit_size");
1355 }
1356 break;
1357
1358 case load_const_scalar_lo_19bits_sext:
1359 switch (lc->def.bit_size) {
1360 case 64:
1361 lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1362 break;
1363 case 32:
1364 lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1365 break;
1366 case 16:
1367 lc->value[0].u16 = header.load_const.packed_value;
1368 break;
1369 case 8:
1370 lc->value[0].u8 = header.load_const.packed_value;
1371 break;
1372 case 1:
1373 lc->value[0].b = header.load_const.packed_value;
1374 break;
1375 default:
1376 unreachable("invalid bit_size");
1377 }
1378 break;
1379
1380 case load_const_full:
1381 switch (lc->def.bit_size) {
1382 case 64:
1383 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1384 break;
1385
1386 case 32:
1387 for (unsigned i = 0; i < lc->def.num_components; i++)
1388 lc->value[i].u32 = blob_read_uint32(ctx->blob);
1389 break;
1390
1391 case 16:
1392 for (unsigned i = 0; i < lc->def.num_components; i++)
1393 lc->value[i].u16 = blob_read_uint16(ctx->blob);
1394 break;
1395
1396 default:
1397 assert(lc->def.bit_size <= 8);
1398 for (unsigned i = 0; i < lc->def.num_components; i++)
1399 lc->value[i].u8 = blob_read_uint8(ctx->blob);
1400 break;
1401 }
1402 break;
1403 }
1404
1405 read_add_object(ctx, &lc->def);
1406 return lc;
1407 }
1408
1409 static void
write_ssa_undef(write_ctx * ctx,const nir_ssa_undef_instr * undef)1410 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1411 {
1412 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1413
1414 union packed_instr header;
1415 header.u32 = 0;
1416
1417 header.undef.instr_type = undef->instr.type;
1418 header.undef.last_component = undef->def.num_components - 1;
1419 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1420
1421 blob_write_uint32(ctx->blob, header.u32);
1422 write_add_object(ctx, &undef->def);
1423 }
1424
1425 static nir_ssa_undef_instr *
read_ssa_undef(read_ctx * ctx,union packed_instr header)1426 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1427 {
1428 nir_ssa_undef_instr *undef =
1429 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1430 decode_bit_size_3bits(header.undef.bit_size));
1431
1432 read_add_object(ctx, &undef->def);
1433 return undef;
1434 }
1435
1436 union packed_tex_data {
1437 uint32_t u32;
1438 struct {
1439 unsigned sampler_dim:4;
1440 unsigned dest_type:8;
1441 unsigned coord_components:3;
1442 unsigned is_array:1;
1443 unsigned is_shadow:1;
1444 unsigned is_new_style_shadow:1;
1445 unsigned is_sparse:1;
1446 unsigned component:2;
1447 unsigned texture_non_uniform:1;
1448 unsigned sampler_non_uniform:1;
1449 unsigned array_is_lowered_cube:1;
1450 unsigned unused:6; /* Mark unused for valgrind. */
1451 } u;
1452 };
1453
1454 static void
write_tex(write_ctx * ctx,const nir_tex_instr * tex)1455 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1456 {
1457 assert(tex->num_srcs < 16);
1458 assert(tex->op < 16);
1459
1460 union packed_instr header;
1461 header.u32 = 0;
1462
1463 header.tex.instr_type = tex->instr.type;
1464 header.tex.num_srcs = tex->num_srcs;
1465 header.tex.op = tex->op;
1466
1467 write_dest(ctx, &tex->dest, header, tex->instr.type);
1468
1469 blob_write_uint32(ctx->blob, tex->texture_index);
1470 blob_write_uint32(ctx->blob, tex->sampler_index);
1471 if (tex->op == nir_texop_tg4)
1472 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1473
1474 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1475 union packed_tex_data packed = {
1476 .u.sampler_dim = tex->sampler_dim,
1477 .u.dest_type = tex->dest_type,
1478 .u.coord_components = tex->coord_components,
1479 .u.is_array = tex->is_array,
1480 .u.is_shadow = tex->is_shadow,
1481 .u.is_new_style_shadow = tex->is_new_style_shadow,
1482 .u.is_sparse = tex->is_sparse,
1483 .u.component = tex->component,
1484 .u.texture_non_uniform = tex->texture_non_uniform,
1485 .u.sampler_non_uniform = tex->sampler_non_uniform,
1486 .u.array_is_lowered_cube = tex->array_is_lowered_cube,
1487 };
1488 blob_write_uint32(ctx->blob, packed.u32);
1489
1490 for (unsigned i = 0; i < tex->num_srcs; i++) {
1491 union packed_src src;
1492 src.u32 = 0;
1493 src.tex.src_type = tex->src[i].src_type;
1494 write_src_full(ctx, &tex->src[i].src, src);
1495 }
1496 }
1497
1498 static nir_tex_instr *
read_tex(read_ctx * ctx,union packed_instr header)1499 read_tex(read_ctx *ctx, union packed_instr header)
1500 {
1501 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1502
1503 read_dest(ctx, &tex->dest, &tex->instr, header);
1504
1505 tex->op = header.tex.op;
1506 tex->texture_index = blob_read_uint32(ctx->blob);
1507 tex->sampler_index = blob_read_uint32(ctx->blob);
1508 if (tex->op == nir_texop_tg4)
1509 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1510
1511 union packed_tex_data packed;
1512 packed.u32 = blob_read_uint32(ctx->blob);
1513 tex->sampler_dim = packed.u.sampler_dim;
1514 tex->dest_type = packed.u.dest_type;
1515 tex->coord_components = packed.u.coord_components;
1516 tex->is_array = packed.u.is_array;
1517 tex->is_shadow = packed.u.is_shadow;
1518 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1519 tex->is_sparse = packed.u.is_sparse;
1520 tex->component = packed.u.component;
1521 tex->texture_non_uniform = packed.u.texture_non_uniform;
1522 tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1523 tex->array_is_lowered_cube = packed.u.array_is_lowered_cube;
1524
1525 for (unsigned i = 0; i < tex->num_srcs; i++) {
1526 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1527 tex->src[i].src_type = src.tex.src_type;
1528 }
1529
1530 return tex;
1531 }
1532
1533 static void
write_phi(write_ctx * ctx,const nir_phi_instr * phi)1534 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1535 {
1536 union packed_instr header;
1537 header.u32 = 0;
1538
1539 header.phi.instr_type = phi->instr.type;
1540 header.phi.num_srcs = exec_list_length(&phi->srcs);
1541
1542 /* Phi nodes are special, since they may reference SSA definitions and
1543 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1544 * and then store enough information so that a later fixup pass can fill
1545 * them in correctly.
1546 */
1547 write_dest(ctx, &phi->dest, header, phi->instr.type);
1548
1549 nir_foreach_phi_src(src, phi) {
1550 assert(src->src.is_ssa);
1551 size_t blob_offset = blob_reserve_uint32(ctx->blob);
1552 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1553 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1554 write_phi_fixup fixup = {
1555 .blob_offset = blob_offset,
1556 .src = src->src.ssa,
1557 .block = src->pred,
1558 };
1559 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1560 }
1561 }
1562
1563 static void
write_fixup_phis(write_ctx * ctx)1564 write_fixup_phis(write_ctx *ctx)
1565 {
1566 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1567 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
1568 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
1569 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
1570 }
1571
1572 util_dynarray_clear(&ctx->phi_fixups);
1573 }
1574
1575 static nir_phi_instr *
read_phi(read_ctx * ctx,nir_block * blk,union packed_instr header)1576 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1577 {
1578 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1579
1580 read_dest(ctx, &phi->dest, &phi->instr, header);
1581
1582 /* For similar reasons as before, we just store the index directly into the
1583 * pointer, and let a later pass resolve the phi sources.
1584 *
1585 * In order to ensure that the copied sources (which are just the indices
1586 * from the blob for now) don't get inserted into the old shader's use-def
1587 * lists, we have to add the phi instruction *before* we set up its
1588 * sources.
1589 */
1590 nir_instr_insert_after_block(blk, &phi->instr);
1591
1592 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1593 nir_ssa_def *def = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1594 nir_block *pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1595 nir_phi_src *src = nir_phi_instr_add_src(phi, pred, nir_src_for_ssa(def));
1596
1597 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1598 * we have to set the parent_instr manually. It doesn't really matter
1599 * when we do it, so we might as well do it here.
1600 */
1601 src->src.parent_instr = &phi->instr;
1602
1603 /* Stash it in the list of phi sources. We'll walk this list and fix up
1604 * sources at the very end of read_function_impl.
1605 */
1606 list_add(&src->src.use_link, &ctx->phi_srcs);
1607 }
1608
1609 return phi;
1610 }
1611
1612 static void
read_fixup_phis(read_ctx * ctx)1613 read_fixup_phis(read_ctx *ctx)
1614 {
1615 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1616 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1617 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1618
1619 /* Remove from this list */
1620 list_del(&src->src.use_link);
1621
1622 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1623 }
1624 assert(list_is_empty(&ctx->phi_srcs));
1625 }
1626
1627 static void
write_jump(write_ctx * ctx,const nir_jump_instr * jmp)1628 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1629 {
1630 /* These aren't handled because they require special block linking */
1631 assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
1632
1633 assert(jmp->type < 4);
1634
1635 union packed_instr header;
1636 header.u32 = 0;
1637
1638 header.jump.instr_type = jmp->instr.type;
1639 header.jump.type = jmp->type;
1640
1641 blob_write_uint32(ctx->blob, header.u32);
1642 }
1643
1644 static nir_jump_instr *
read_jump(read_ctx * ctx,union packed_instr header)1645 read_jump(read_ctx *ctx, union packed_instr header)
1646 {
1647 /* These aren't handled because they require special block linking */
1648 assert(header.jump.type != nir_jump_goto &&
1649 header.jump.type != nir_jump_goto_if);
1650
1651 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1652 return jmp;
1653 }
1654
1655 static void
write_call(write_ctx * ctx,const nir_call_instr * call)1656 write_call(write_ctx *ctx, const nir_call_instr *call)
1657 {
1658 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1659
1660 for (unsigned i = 0; i < call->num_params; i++)
1661 write_src(ctx, &call->params[i]);
1662 }
1663
1664 static nir_call_instr *
read_call(read_ctx * ctx)1665 read_call(read_ctx *ctx)
1666 {
1667 nir_function *callee = read_object(ctx);
1668 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1669
1670 for (unsigned i = 0; i < call->num_params; i++)
1671 read_src(ctx, &call->params[i], call);
1672
1673 return call;
1674 }
1675
1676 static void
write_instr(write_ctx * ctx,const nir_instr * instr)1677 write_instr(write_ctx *ctx, const nir_instr *instr)
1678 {
1679 /* We have only 4 bits for the instruction type. */
1680 assert(instr->type < 16);
1681
1682 switch (instr->type) {
1683 case nir_instr_type_alu:
1684 write_alu(ctx, nir_instr_as_alu(instr));
1685 break;
1686 case nir_instr_type_deref:
1687 write_deref(ctx, nir_instr_as_deref(instr));
1688 break;
1689 case nir_instr_type_intrinsic:
1690 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1691 break;
1692 case nir_instr_type_load_const:
1693 write_load_const(ctx, nir_instr_as_load_const(instr));
1694 break;
1695 case nir_instr_type_ssa_undef:
1696 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1697 break;
1698 case nir_instr_type_tex:
1699 write_tex(ctx, nir_instr_as_tex(instr));
1700 break;
1701 case nir_instr_type_phi:
1702 write_phi(ctx, nir_instr_as_phi(instr));
1703 break;
1704 case nir_instr_type_jump:
1705 write_jump(ctx, nir_instr_as_jump(instr));
1706 break;
1707 case nir_instr_type_call:
1708 blob_write_uint32(ctx->blob, instr->type);
1709 write_call(ctx, nir_instr_as_call(instr));
1710 break;
1711 case nir_instr_type_parallel_copy:
1712 unreachable("Cannot write parallel copies");
1713 default:
1714 unreachable("bad instr type");
1715 }
1716 }
1717
1718 /* Return the number of instructions read. */
1719 static unsigned
read_instr(read_ctx * ctx,nir_block * block)1720 read_instr(read_ctx *ctx, nir_block *block)
1721 {
1722 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1723 union packed_instr header;
1724 header.u32 = blob_read_uint32(ctx->blob);
1725 nir_instr *instr;
1726
1727 switch (header.any.instr_type) {
1728 case nir_instr_type_alu:
1729 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1730 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1731 return header.alu.num_followup_alu_sharing_header + 1;
1732 case nir_instr_type_deref:
1733 instr = &read_deref(ctx, header)->instr;
1734 break;
1735 case nir_instr_type_intrinsic:
1736 instr = &read_intrinsic(ctx, header)->instr;
1737 break;
1738 case nir_instr_type_load_const:
1739 instr = &read_load_const(ctx, header)->instr;
1740 break;
1741 case nir_instr_type_ssa_undef:
1742 instr = &read_ssa_undef(ctx, header)->instr;
1743 break;
1744 case nir_instr_type_tex:
1745 instr = &read_tex(ctx, header)->instr;
1746 break;
1747 case nir_instr_type_phi:
1748 /* Phi instructions are a bit of a special case when reading because we
1749 * don't want inserting the instruction to automatically handle use/defs
1750 * for us. Instead, we need to wait until all the blocks/instructions
1751 * are read so that we can set their sources up.
1752 */
1753 read_phi(ctx, block, header);
1754 return 1;
1755 case nir_instr_type_jump:
1756 instr = &read_jump(ctx, header)->instr;
1757 break;
1758 case nir_instr_type_call:
1759 instr = &read_call(ctx)->instr;
1760 break;
1761 case nir_instr_type_parallel_copy:
1762 unreachable("Cannot read parallel copies");
1763 default:
1764 unreachable("bad instr type");
1765 }
1766
1767 nir_instr_insert_after_block(block, instr);
1768 return 1;
1769 }
1770
1771 static void
write_block(write_ctx * ctx,const nir_block * block)1772 write_block(write_ctx *ctx, const nir_block *block)
1773 {
1774 write_add_object(ctx, block);
1775 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1776
1777 ctx->last_instr_type = ~0;
1778 ctx->last_alu_header_offset = 0;
1779
1780 nir_foreach_instr(instr, block) {
1781 write_instr(ctx, instr);
1782 ctx->last_instr_type = instr->type;
1783 }
1784 }
1785
1786 static void
read_block(read_ctx * ctx,struct exec_list * cf_list)1787 read_block(read_ctx *ctx, struct exec_list *cf_list)
1788 {
1789 /* Don't actually create a new block. Just use the one from the tail of
1790 * the list. NIR guarantees that the tail of the list is a block and that
1791 * no two blocks are side-by-side in the IR; It should be empty.
1792 */
1793 nir_block *block =
1794 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1795
1796 read_add_object(ctx, block);
1797 unsigned num_instrs = blob_read_uint32(ctx->blob);
1798 for (unsigned i = 0; i < num_instrs;) {
1799 i += read_instr(ctx, block);
1800 }
1801 }
1802
1803 static void
1804 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1805
1806 static void
1807 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1808
1809 static void
write_if(write_ctx * ctx,nir_if * nif)1810 write_if(write_ctx *ctx, nir_if *nif)
1811 {
1812 write_src(ctx, &nif->condition);
1813 blob_write_uint8(ctx->blob, nif->control);
1814
1815 write_cf_list(ctx, &nif->then_list);
1816 write_cf_list(ctx, &nif->else_list);
1817 }
1818
1819 static void
read_if(read_ctx * ctx,struct exec_list * cf_list)1820 read_if(read_ctx *ctx, struct exec_list *cf_list)
1821 {
1822 nir_if *nif = nir_if_create(ctx->nir);
1823
1824 read_src(ctx, &nif->condition, nif);
1825 nif->control = blob_read_uint8(ctx->blob);
1826
1827 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1828
1829 read_cf_list(ctx, &nif->then_list);
1830 read_cf_list(ctx, &nif->else_list);
1831 }
1832
1833 static void
write_loop(write_ctx * ctx,nir_loop * loop)1834 write_loop(write_ctx *ctx, nir_loop *loop)
1835 {
1836 blob_write_uint8(ctx->blob, loop->control);
1837 write_cf_list(ctx, &loop->body);
1838 }
1839
1840 static void
read_loop(read_ctx * ctx,struct exec_list * cf_list)1841 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1842 {
1843 nir_loop *loop = nir_loop_create(ctx->nir);
1844
1845 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1846
1847 loop->control = blob_read_uint8(ctx->blob);
1848 read_cf_list(ctx, &loop->body);
1849 }
1850
1851 static void
write_cf_node(write_ctx * ctx,nir_cf_node * cf)1852 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1853 {
1854 blob_write_uint32(ctx->blob, cf->type);
1855
1856 switch (cf->type) {
1857 case nir_cf_node_block:
1858 write_block(ctx, nir_cf_node_as_block(cf));
1859 break;
1860 case nir_cf_node_if:
1861 write_if(ctx, nir_cf_node_as_if(cf));
1862 break;
1863 case nir_cf_node_loop:
1864 write_loop(ctx, nir_cf_node_as_loop(cf));
1865 break;
1866 default:
1867 unreachable("bad cf type");
1868 }
1869 }
1870
1871 static void
read_cf_node(read_ctx * ctx,struct exec_list * list)1872 read_cf_node(read_ctx *ctx, struct exec_list *list)
1873 {
1874 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1875
1876 switch (type) {
1877 case nir_cf_node_block:
1878 read_block(ctx, list);
1879 break;
1880 case nir_cf_node_if:
1881 read_if(ctx, list);
1882 break;
1883 case nir_cf_node_loop:
1884 read_loop(ctx, list);
1885 break;
1886 default:
1887 unreachable("bad cf type");
1888 }
1889 }
1890
1891 static void
write_cf_list(write_ctx * ctx,const struct exec_list * cf_list)1892 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1893 {
1894 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1895 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1896 write_cf_node(ctx, cf);
1897 }
1898 }
1899
1900 static void
read_cf_list(read_ctx * ctx,struct exec_list * cf_list)1901 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1902 {
1903 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1904 for (unsigned i = 0; i < num_cf_nodes; i++)
1905 read_cf_node(ctx, cf_list);
1906 }
1907
1908 static void
write_function_impl(write_ctx * ctx,const nir_function_impl * fi)1909 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1910 {
1911 blob_write_uint8(ctx->blob, fi->structured);
1912
1913 write_var_list(ctx, &fi->locals);
1914 write_reg_list(ctx, &fi->registers);
1915 blob_write_uint32(ctx->blob, fi->reg_alloc);
1916
1917 write_cf_list(ctx, &fi->body);
1918 write_fixup_phis(ctx);
1919 }
1920
1921 static nir_function_impl *
read_function_impl(read_ctx * ctx,nir_function * fxn)1922 read_function_impl(read_ctx *ctx, nir_function *fxn)
1923 {
1924 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1925 fi->function = fxn;
1926
1927 fi->structured = blob_read_uint8(ctx->blob);
1928
1929 read_var_list(ctx, &fi->locals);
1930 read_reg_list(ctx, &fi->registers);
1931 fi->reg_alloc = blob_read_uint32(ctx->blob);
1932
1933 read_cf_list(ctx, &fi->body);
1934 read_fixup_phis(ctx);
1935
1936 fi->valid_metadata = 0;
1937
1938 return fi;
1939 }
1940
1941 static void
write_function(write_ctx * ctx,const nir_function * fxn)1942 write_function(write_ctx *ctx, const nir_function *fxn)
1943 {
1944 uint32_t flags = fxn->is_entrypoint;
1945 if (fxn->name)
1946 flags |= 0x2;
1947 if (fxn->impl)
1948 flags |= 0x4;
1949 blob_write_uint32(ctx->blob, flags);
1950 if (fxn->name)
1951 blob_write_string(ctx->blob, fxn->name);
1952
1953 write_add_object(ctx, fxn);
1954
1955 blob_write_uint32(ctx->blob, fxn->num_params);
1956 for (unsigned i = 0; i < fxn->num_params; i++) {
1957 uint32_t val =
1958 ((uint32_t)fxn->params[i].num_components) |
1959 ((uint32_t)fxn->params[i].bit_size) << 8;
1960 blob_write_uint32(ctx->blob, val);
1961 }
1962
1963 /* At first glance, it looks like we should write the function_impl here.
1964 * However, call instructions need to be able to reference at least the
1965 * function and those will get processed as we write the function_impls.
1966 * We stop here and write function_impls as a second pass.
1967 */
1968 }
1969
1970 static void
read_function(read_ctx * ctx)1971 read_function(read_ctx *ctx)
1972 {
1973 uint32_t flags = blob_read_uint32(ctx->blob);
1974 bool has_name = flags & 0x2;
1975 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1976
1977 nir_function *fxn = nir_function_create(ctx->nir, name);
1978
1979 read_add_object(ctx, fxn);
1980
1981 fxn->num_params = blob_read_uint32(ctx->blob);
1982 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1983 for (unsigned i = 0; i < fxn->num_params; i++) {
1984 uint32_t val = blob_read_uint32(ctx->blob);
1985 fxn->params[i].num_components = val & 0xff;
1986 fxn->params[i].bit_size = (val >> 8) & 0xff;
1987 }
1988
1989 fxn->is_entrypoint = flags & 0x1;
1990 if (flags & 0x4)
1991 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1992 }
1993
1994 /**
1995 * Serialize NIR into a binary blob.
1996 *
1997 * \param strip Don't serialize information only useful for debugging,
1998 * such as variable names, making cache hits from similar
1999 * shaders more likely.
2000 */
2001 void
nir_serialize(struct blob * blob,const nir_shader * nir,bool strip)2002 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
2003 {
2004 write_ctx ctx = {0};
2005 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2006 ctx.blob = blob;
2007 ctx.nir = nir;
2008 ctx.strip = strip;
2009 util_dynarray_init(&ctx.phi_fixups, NULL);
2010
2011 size_t idx_size_offset = blob_reserve_uint32(blob);
2012
2013 struct shader_info info = nir->info;
2014 uint32_t strings = 0;
2015 if (!strip && info.name)
2016 strings |= 0x1;
2017 if (!strip && info.label)
2018 strings |= 0x2;
2019 blob_write_uint32(blob, strings);
2020 if (!strip && info.name)
2021 blob_write_string(blob, info.name);
2022 if (!strip && info.label)
2023 blob_write_string(blob, info.label);
2024 info.name = info.label = NULL;
2025 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2026
2027 write_var_list(&ctx, &nir->variables);
2028
2029 blob_write_uint32(blob, nir->num_inputs);
2030 blob_write_uint32(blob, nir->num_uniforms);
2031 blob_write_uint32(blob, nir->num_outputs);
2032 blob_write_uint32(blob, nir->scratch_size);
2033
2034 blob_write_uint32(blob, exec_list_length(&nir->functions));
2035 nir_foreach_function(fxn, nir) {
2036 write_function(&ctx, fxn);
2037 }
2038
2039 nir_foreach_function(fxn, nir) {
2040 if (fxn->impl)
2041 write_function_impl(&ctx, fxn->impl);
2042 }
2043
2044 blob_write_uint32(blob, nir->constant_data_size);
2045 if (nir->constant_data_size > 0)
2046 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2047
2048 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
2049
2050 _mesa_hash_table_destroy(ctx.remap_table, NULL);
2051 util_dynarray_fini(&ctx.phi_fixups);
2052 }
2053
2054 nir_shader *
nir_deserialize(void * mem_ctx,const struct nir_shader_compiler_options * options,struct blob_reader * blob)2055 nir_deserialize(void *mem_ctx,
2056 const struct nir_shader_compiler_options *options,
2057 struct blob_reader *blob)
2058 {
2059 read_ctx ctx = {0};
2060 ctx.blob = blob;
2061 list_inithead(&ctx.phi_srcs);
2062 ctx.idx_table_len = blob_read_uint32(blob);
2063 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2064
2065 uint32_t strings = blob_read_uint32(blob);
2066 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2067 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2068
2069 struct shader_info info;
2070 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2071
2072 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2073
2074 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2075 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2076
2077 ctx.nir->info = info;
2078
2079 read_var_list(&ctx, &ctx.nir->variables);
2080
2081 ctx.nir->num_inputs = blob_read_uint32(blob);
2082 ctx.nir->num_uniforms = blob_read_uint32(blob);
2083 ctx.nir->num_outputs = blob_read_uint32(blob);
2084 ctx.nir->scratch_size = blob_read_uint32(blob);
2085
2086 unsigned num_functions = blob_read_uint32(blob);
2087 for (unsigned i = 0; i < num_functions; i++)
2088 read_function(&ctx);
2089
2090 nir_foreach_function(fxn, ctx.nir) {
2091 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2092 fxn->impl = read_function_impl(&ctx, fxn);
2093 }
2094
2095 ctx.nir->constant_data_size = blob_read_uint32(blob);
2096 if (ctx.nir->constant_data_size > 0) {
2097 ctx.nir->constant_data =
2098 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2099 blob_copy_bytes(blob, ctx.nir->constant_data,
2100 ctx.nir->constant_data_size);
2101 }
2102
2103 free(ctx.idx_table);
2104
2105 nir_validate_shader(ctx.nir, "after deserialize");
2106
2107 return ctx.nir;
2108 }
2109
2110 void
nir_shader_serialize_deserialize(nir_shader * shader)2111 nir_shader_serialize_deserialize(nir_shader *shader)
2112 {
2113 const struct nir_shader_compiler_options *options = shader->options;
2114
2115 struct blob writer;
2116 blob_init(&writer);
2117 nir_serialize(&writer, shader, false);
2118
2119 /* Delete all of dest's ralloc children but leave dest alone */
2120 void *dead_ctx = ralloc_context(NULL);
2121 ralloc_adopt(dead_ctx, shader);
2122 ralloc_free(dead_ctx);
2123
2124 dead_ctx = ralloc_context(NULL);
2125
2126 struct blob_reader reader;
2127 blob_reader_init(&reader, writer.data, writer.size);
2128 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2129
2130 blob_finish(&writer);
2131
2132 nir_shader_replace(shader, copy);
2133 ralloc_free(dead_ctx);
2134 }
2135