1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
31
32 typedef struct {
33 size_t blob_offset;
34 nir_ssa_def *src;
35 nir_block *block;
36 } write_phi_fixup;
37
38 typedef struct {
39 const nir_shader *nir;
40
41 struct blob *blob;
42
43 /* maps pointer to index */
44 struct hash_table *remap_table;
45
46 /* the next index to assign to a NIR in-memory object */
47 uint32_t next_idx;
48
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
51 */
52 struct util_dynarray phi_fixups;
53
54 /* The last serialized type. */
55 const struct glsl_type *last_type;
56 const struct glsl_type *last_interface_type;
57 struct nir_variable_data last_var_data;
58
59 /* For skipping equal ALU headers (typical after scalarization). */
60 nir_instr_type last_instr_type;
61 uintptr_t last_alu_header_offset;
62
63 /* Don't write optional data such as variable names. */
64 bool strip;
65 } write_ctx;
66
67 typedef struct {
68 nir_shader *nir;
69
70 struct blob_reader *blob;
71
72 /* the next index to assign to a NIR in-memory object */
73 uint32_t next_idx;
74
75 /* The length of the index -> object table */
76 uint32_t idx_table_len;
77
78 /* map from index to deserialized pointer */
79 void **idx_table;
80
81 /* List of phi sources. */
82 struct list_head phi_srcs;
83
84 /* The last deserialized type. */
85 const struct glsl_type *last_type;
86 const struct glsl_type *last_interface_type;
87 struct nir_variable_data last_var_data;
88 } read_ctx;
89
90 static void
write_add_object(write_ctx * ctx,const void * obj)91 write_add_object(write_ctx *ctx, const void *obj)
92 {
93 uint32_t index = ctx->next_idx++;
94 assert(index != MAX_OBJECT_IDS);
95 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
96 }
97
98 static uint32_t
write_lookup_object(write_ctx * ctx,const void * obj)99 write_lookup_object(write_ctx *ctx, const void *obj)
100 {
101 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
102 assert(entry);
103 return (uint32_t)(uintptr_t) entry->data;
104 }
105
106 static void
read_add_object(read_ctx * ctx,void * obj)107 read_add_object(read_ctx *ctx, void *obj)
108 {
109 assert(ctx->next_idx < ctx->idx_table_len);
110 ctx->idx_table[ctx->next_idx++] = obj;
111 }
112
113 static void *
read_lookup_object(read_ctx * ctx,uint32_t idx)114 read_lookup_object(read_ctx *ctx, uint32_t idx)
115 {
116 assert(idx < ctx->idx_table_len);
117 return ctx->idx_table[idx];
118 }
119
120 static void *
read_object(read_ctx * ctx)121 read_object(read_ctx *ctx)
122 {
123 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
124 }
125
126 static uint32_t
encode_bit_size_3bits(uint8_t bit_size)127 encode_bit_size_3bits(uint8_t bit_size)
128 {
129 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
130 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
131 if (bit_size)
132 return util_logbase2(bit_size) + 1;
133 return 0;
134 }
135
136 static uint8_t
decode_bit_size_3bits(uint8_t bit_size)137 decode_bit_size_3bits(uint8_t bit_size)
138 {
139 if (bit_size)
140 return 1 << (bit_size - 1);
141 return 0;
142 }
143
144 #define NUM_COMPONENTS_IS_SEPARATE_7 7
145
146 static uint8_t
encode_num_components_in_3bits(uint8_t num_components)147 encode_num_components_in_3bits(uint8_t num_components)
148 {
149 if (num_components <= 4)
150 return num_components;
151 if (num_components == 8)
152 return 5;
153 if (num_components == 16)
154 return 6;
155
156 /* special value indicating that num_components is in the next uint32 */
157 return NUM_COMPONENTS_IS_SEPARATE_7;
158 }
159
160 static uint8_t
decode_num_components_in_3bits(uint8_t value)161 decode_num_components_in_3bits(uint8_t value)
162 {
163 if (value <= 4)
164 return value;
165 if (value == 5)
166 return 8;
167 if (value == 6)
168 return 16;
169
170 unreachable("invalid num_components encoding");
171 return 0;
172 }
173
174 static void
write_constant(write_ctx * ctx,const nir_constant * c)175 write_constant(write_ctx *ctx, const nir_constant *c)
176 {
177 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
178 blob_write_uint32(ctx->blob, c->num_elements);
179 for (unsigned i = 0; i < c->num_elements; i++)
180 write_constant(ctx, c->elements[i]);
181 }
182
183 static nir_constant *
read_constant(read_ctx * ctx,nir_variable * nvar)184 read_constant(read_ctx *ctx, nir_variable *nvar)
185 {
186 nir_constant *c = ralloc(nvar, nir_constant);
187
188 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
189 c->num_elements = blob_read_uint32(ctx->blob);
190 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
191 for (unsigned i = 0; i < c->num_elements; i++)
192 c->elements[i] = read_constant(ctx, nvar);
193
194 return c;
195 }
196
197 enum var_data_encoding {
198 var_encode_full,
199 var_encode_shader_temp,
200 var_encode_function_temp,
201 var_encode_location_diff,
202 };
203
204 union packed_var {
205 uint32_t u32;
206 struct {
207 unsigned has_name:1;
208 unsigned has_constant_initializer:1;
209 unsigned has_pointer_initializer:1;
210 unsigned has_interface_type:1;
211 unsigned num_state_slots:7;
212 unsigned data_encoding:2;
213 unsigned type_same_as_last:1;
214 unsigned interface_type_same_as_last:1;
215 unsigned _pad:1;
216 unsigned num_members:16;
217 } u;
218 };
219
220 union packed_var_data_diff {
221 uint32_t u32;
222 struct {
223 int location:13;
224 int location_frac:3;
225 int driver_location:16;
226 } u;
227 };
228
229 static void
write_variable(write_ctx * ctx,const nir_variable * var)230 write_variable(write_ctx *ctx, const nir_variable *var)
231 {
232 write_add_object(ctx, var);
233
234 assert(var->num_state_slots < (1 << 7));
235
236 STATIC_ASSERT(sizeof(union packed_var) == 4);
237 union packed_var flags;
238 flags.u32 = 0;
239
240 flags.u.has_name = !ctx->strip && var->name;
241 flags.u.has_constant_initializer = !!(var->constant_initializer);
242 flags.u.has_pointer_initializer = !!(var->pointer_initializer);
243 flags.u.has_interface_type = !!(var->interface_type);
244 flags.u.type_same_as_last = var->type == ctx->last_type;
245 flags.u.interface_type_same_as_last =
246 var->interface_type && var->interface_type == ctx->last_interface_type;
247 flags.u.num_state_slots = var->num_state_slots;
248 flags.u.num_members = var->num_members;
249
250 struct nir_variable_data data = var->data;
251
252 /* When stripping, we expect that the location is no longer needed,
253 * which is typically after shaders are linked.
254 */
255 if (ctx->strip &&
256 data.mode != nir_var_system_value &&
257 data.mode != nir_var_shader_in &&
258 data.mode != nir_var_shader_out)
259 data.location = 0;
260
261 /* Temporary variables don't serialize var->data. */
262 if (data.mode == nir_var_shader_temp)
263 flags.u.data_encoding = var_encode_shader_temp;
264 else if (data.mode == nir_var_function_temp)
265 flags.u.data_encoding = var_encode_function_temp;
266 else {
267 struct nir_variable_data tmp = data;
268
269 tmp.location = ctx->last_var_data.location;
270 tmp.location_frac = ctx->last_var_data.location_frac;
271 tmp.driver_location = ctx->last_var_data.driver_location;
272
273 /* See if we can encode only the difference in locations from the last
274 * variable.
275 */
276 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
277 abs((int)data.location -
278 (int)ctx->last_var_data.location) < (1 << 12) &&
279 abs((int)data.driver_location -
280 (int)ctx->last_var_data.driver_location) < (1 << 15))
281 flags.u.data_encoding = var_encode_location_diff;
282 else
283 flags.u.data_encoding = var_encode_full;
284 }
285
286 blob_write_uint32(ctx->blob, flags.u32);
287
288 if (!flags.u.type_same_as_last) {
289 encode_type_to_blob(ctx->blob, var->type);
290 ctx->last_type = var->type;
291 }
292
293 if (var->interface_type && !flags.u.interface_type_same_as_last) {
294 encode_type_to_blob(ctx->blob, var->interface_type);
295 ctx->last_interface_type = var->interface_type;
296 }
297
298 if (flags.u.has_name)
299 blob_write_string(ctx->blob, var->name);
300
301 if (flags.u.data_encoding == var_encode_full ||
302 flags.u.data_encoding == var_encode_location_diff) {
303 if (flags.u.data_encoding == var_encode_full) {
304 blob_write_bytes(ctx->blob, &data, sizeof(data));
305 } else {
306 /* Serialize only the difference in locations from the last variable.
307 */
308 union packed_var_data_diff diff;
309
310 diff.u.location = data.location - ctx->last_var_data.location;
311 diff.u.location_frac = data.location_frac -
312 ctx->last_var_data.location_frac;
313 diff.u.driver_location = data.driver_location -
314 ctx->last_var_data.driver_location;
315
316 blob_write_uint32(ctx->blob, diff.u32);
317 }
318
319 ctx->last_var_data = data;
320 }
321
322 for (unsigned i = 0; i < var->num_state_slots; i++) {
323 blob_write_bytes(ctx->blob, &var->state_slots[i],
324 sizeof(var->state_slots[i]));
325 }
326 if (var->constant_initializer)
327 write_constant(ctx, var->constant_initializer);
328 if (var->pointer_initializer)
329 write_lookup_object(ctx, var->pointer_initializer);
330 if (var->num_members > 0) {
331 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
332 var->num_members * sizeof(*var->members));
333 }
334 }
335
336 static nir_variable *
read_variable(read_ctx * ctx)337 read_variable(read_ctx *ctx)
338 {
339 nir_variable *var = rzalloc(ctx->nir, nir_variable);
340 read_add_object(ctx, var);
341
342 union packed_var flags;
343 flags.u32 = blob_read_uint32(ctx->blob);
344
345 if (flags.u.type_same_as_last) {
346 var->type = ctx->last_type;
347 } else {
348 var->type = decode_type_from_blob(ctx->blob);
349 ctx->last_type = var->type;
350 }
351
352 if (flags.u.has_interface_type) {
353 if (flags.u.interface_type_same_as_last) {
354 var->interface_type = ctx->last_interface_type;
355 } else {
356 var->interface_type = decode_type_from_blob(ctx->blob);
357 ctx->last_interface_type = var->interface_type;
358 }
359 }
360
361 if (flags.u.has_name) {
362 const char *name = blob_read_string(ctx->blob);
363 var->name = ralloc_strdup(var, name);
364 } else {
365 var->name = NULL;
366 }
367
368 if (flags.u.data_encoding == var_encode_shader_temp)
369 var->data.mode = nir_var_shader_temp;
370 else if (flags.u.data_encoding == var_encode_function_temp)
371 var->data.mode = nir_var_function_temp;
372 else if (flags.u.data_encoding == var_encode_full) {
373 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
374 ctx->last_var_data = var->data;
375 } else { /* var_encode_location_diff */
376 union packed_var_data_diff diff;
377 diff.u32 = blob_read_uint32(ctx->blob);
378
379 var->data = ctx->last_var_data;
380 var->data.location += diff.u.location;
381 var->data.location_frac += diff.u.location_frac;
382 var->data.driver_location += diff.u.driver_location;
383
384 ctx->last_var_data = var->data;
385 }
386
387 var->num_state_slots = flags.u.num_state_slots;
388 if (var->num_state_slots != 0) {
389 var->state_slots = ralloc_array(var, nir_state_slot,
390 var->num_state_slots);
391 for (unsigned i = 0; i < var->num_state_slots; i++) {
392 blob_copy_bytes(ctx->blob, &var->state_slots[i],
393 sizeof(var->state_slots[i]));
394 }
395 }
396 if (flags.u.has_constant_initializer)
397 var->constant_initializer = read_constant(ctx, var);
398 else
399 var->constant_initializer = NULL;
400
401 if (flags.u.has_pointer_initializer)
402 var->pointer_initializer = read_object(ctx);
403 else
404 var->pointer_initializer = NULL;
405
406 var->num_members = flags.u.num_members;
407 if (var->num_members > 0) {
408 var->members = ralloc_array(var, struct nir_variable_data,
409 var->num_members);
410 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
411 var->num_members * sizeof(*var->members));
412 }
413
414 return var;
415 }
416
417 static void
write_var_list(write_ctx * ctx,const struct exec_list * src)418 write_var_list(write_ctx *ctx, const struct exec_list *src)
419 {
420 blob_write_uint32(ctx->blob, exec_list_length(src));
421 foreach_list_typed(nir_variable, var, node, src) {
422 write_variable(ctx, var);
423 }
424 }
425
426 static void
read_var_list(read_ctx * ctx,struct exec_list * dst)427 read_var_list(read_ctx *ctx, struct exec_list *dst)
428 {
429 exec_list_make_empty(dst);
430 unsigned num_vars = blob_read_uint32(ctx->blob);
431 for (unsigned i = 0; i < num_vars; i++) {
432 nir_variable *var = read_variable(ctx);
433 exec_list_push_tail(dst, &var->node);
434 }
435 }
436
437 static void
write_register(write_ctx * ctx,const nir_register * reg)438 write_register(write_ctx *ctx, const nir_register *reg)
439 {
440 write_add_object(ctx, reg);
441 blob_write_uint32(ctx->blob, reg->num_components);
442 blob_write_uint32(ctx->blob, reg->bit_size);
443 blob_write_uint32(ctx->blob, reg->num_array_elems);
444 blob_write_uint32(ctx->blob, reg->index);
445 blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
446 if (!ctx->strip && reg->name)
447 blob_write_string(ctx->blob, reg->name);
448 }
449
450 static nir_register *
read_register(read_ctx * ctx)451 read_register(read_ctx *ctx)
452 {
453 nir_register *reg = ralloc(ctx->nir, nir_register);
454 read_add_object(ctx, reg);
455 reg->num_components = blob_read_uint32(ctx->blob);
456 reg->bit_size = blob_read_uint32(ctx->blob);
457 reg->num_array_elems = blob_read_uint32(ctx->blob);
458 reg->index = blob_read_uint32(ctx->blob);
459 bool has_name = blob_read_uint32(ctx->blob);
460 if (has_name) {
461 const char *name = blob_read_string(ctx->blob);
462 reg->name = ralloc_strdup(reg, name);
463 } else {
464 reg->name = NULL;
465 }
466
467 list_inithead(®->uses);
468 list_inithead(®->defs);
469 list_inithead(®->if_uses);
470
471 return reg;
472 }
473
474 static void
write_reg_list(write_ctx * ctx,const struct exec_list * src)475 write_reg_list(write_ctx *ctx, const struct exec_list *src)
476 {
477 blob_write_uint32(ctx->blob, exec_list_length(src));
478 foreach_list_typed(nir_register, reg, node, src)
479 write_register(ctx, reg);
480 }
481
482 static void
read_reg_list(read_ctx * ctx,struct exec_list * dst)483 read_reg_list(read_ctx *ctx, struct exec_list *dst)
484 {
485 exec_list_make_empty(dst);
486 unsigned num_regs = blob_read_uint32(ctx->blob);
487 for (unsigned i = 0; i < num_regs; i++) {
488 nir_register *reg = read_register(ctx);
489 exec_list_push_tail(dst, ®->node);
490 }
491 }
492
493 union packed_src {
494 uint32_t u32;
495 struct {
496 unsigned is_ssa:1; /* <-- Header */
497 unsigned is_indirect:1;
498 unsigned object_idx:20;
499 unsigned _footer:10; /* <-- Footer */
500 } any;
501 struct {
502 unsigned _header:22; /* <-- Header */
503 unsigned negate:1; /* <-- Footer */
504 unsigned abs:1;
505 unsigned swizzle_x:2;
506 unsigned swizzle_y:2;
507 unsigned swizzle_z:2;
508 unsigned swizzle_w:2;
509 } alu;
510 struct {
511 unsigned _header:22; /* <-- Header */
512 unsigned src_type:5; /* <-- Footer */
513 unsigned _pad:5;
514 } tex;
515 };
516
517 static void
write_src_full(write_ctx * ctx,const nir_src * src,union packed_src header)518 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
519 {
520 /* Since sources are very frequent, we try to save some space when storing
521 * them. In particular, we store whether the source is a register and
522 * whether the register has an indirect index in the low two bits. We can
523 * assume that the high two bits of the index are zero, since otherwise our
524 * address space would've been exhausted allocating the remap table!
525 */
526 header.any.is_ssa = src->is_ssa;
527 if (src->is_ssa) {
528 header.any.object_idx = write_lookup_object(ctx, src->ssa);
529 blob_write_uint32(ctx->blob, header.u32);
530 } else {
531 header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
532 header.any.is_indirect = !!src->reg.indirect;
533 blob_write_uint32(ctx->blob, header.u32);
534 blob_write_uint32(ctx->blob, src->reg.base_offset);
535 if (src->reg.indirect) {
536 union packed_src header = {0};
537 write_src_full(ctx, src->reg.indirect, header);
538 }
539 }
540 }
541
542 static void
write_src(write_ctx * ctx,const nir_src * src)543 write_src(write_ctx *ctx, const nir_src *src)
544 {
545 union packed_src header = {0};
546 write_src_full(ctx, src, header);
547 }
548
549 static union packed_src
read_src(read_ctx * ctx,nir_src * src,void * mem_ctx)550 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
551 {
552 STATIC_ASSERT(sizeof(union packed_src) == 4);
553 union packed_src header;
554 header.u32 = blob_read_uint32(ctx->blob);
555
556 src->is_ssa = header.any.is_ssa;
557 if (src->is_ssa) {
558 src->ssa = read_lookup_object(ctx, header.any.object_idx);
559 } else {
560 src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
561 src->reg.base_offset = blob_read_uint32(ctx->blob);
562 if (header.any.is_indirect) {
563 src->reg.indirect = ralloc(mem_ctx, nir_src);
564 read_src(ctx, src->reg.indirect, mem_ctx);
565 } else {
566 src->reg.indirect = NULL;
567 }
568 }
569 return header;
570 }
571
572 union packed_dest {
573 uint8_t u8;
574 struct {
575 uint8_t is_ssa:1;
576 uint8_t has_name:1;
577 uint8_t num_components:3;
578 uint8_t bit_size:3;
579 } ssa;
580 struct {
581 uint8_t is_ssa:1;
582 uint8_t is_indirect:1;
583 uint8_t _pad:6;
584 } reg;
585 };
586
587 enum intrinsic_const_indices_encoding {
588 /* Use the 9 bits of packed_const_indices to store 1-9 indices.
589 * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
590 * 4 2-bit indices, or 5-9 1-bit indices.
591 *
592 * The common case for load_ubo is 0, 0, 0, which is trivially represented.
593 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
594 */
595 const_indices_9bit_all_combined,
596
597 const_indices_8bit, /* 8 bits per element */
598 const_indices_16bit, /* 16 bits per element */
599 const_indices_32bit, /* 32 bits per element */
600 };
601
602 enum load_const_packing {
603 /* Constants are not packed and are stored in following dwords. */
604 load_const_full,
605
606 /* packed_value contains high 19 bits, low bits are 0,
607 * good for floating-point decimals
608 */
609 load_const_scalar_hi_19bits,
610
611 /* packed_value contains low 19 bits, high bits are sign-extended */
612 load_const_scalar_lo_19bits_sext,
613 };
614
615 union packed_instr {
616 uint32_t u32;
617 struct {
618 unsigned instr_type:4; /* always present */
619 unsigned _pad:20;
620 unsigned dest:8; /* always last */
621 } any;
622 struct {
623 unsigned instr_type:4;
624 unsigned exact:1;
625 unsigned no_signed_wrap:1;
626 unsigned no_unsigned_wrap:1;
627 unsigned saturate:1;
628 /* Reg: writemask; SSA: swizzles for 2 srcs */
629 unsigned writemask_or_two_swizzles:4;
630 unsigned op:9;
631 unsigned packed_src_ssa_16bit:1;
632 /* Scalarized ALUs always have the same header. */
633 unsigned num_followup_alu_sharing_header:2;
634 unsigned dest:8;
635 } alu;
636 struct {
637 unsigned instr_type:4;
638 unsigned deref_type:3;
639 unsigned cast_type_same_as_last:1;
640 unsigned modes:14; /* deref_var redefines this */
641 unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
642 unsigned _pad:1; /* deref_var redefines this */
643 unsigned dest:8;
644 } deref;
645 struct {
646 unsigned instr_type:4;
647 unsigned deref_type:3;
648 unsigned _pad:1;
649 unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
650 unsigned dest:8;
651 } deref_var;
652 struct {
653 unsigned instr_type:4;
654 unsigned intrinsic:9;
655 unsigned const_indices_encoding:2;
656 unsigned packed_const_indices:9;
657 unsigned dest:8;
658 } intrinsic;
659 struct {
660 unsigned instr_type:4;
661 unsigned last_component:4;
662 unsigned bit_size:3;
663 unsigned packing:2; /* enum load_const_packing */
664 unsigned packed_value:19; /* meaning determined by packing */
665 } load_const;
666 struct {
667 unsigned instr_type:4;
668 unsigned last_component:4;
669 unsigned bit_size:3;
670 unsigned _pad:21;
671 } undef;
672 struct {
673 unsigned instr_type:4;
674 unsigned num_srcs:4;
675 unsigned op:4;
676 unsigned dest:8;
677 unsigned _pad:12;
678 } tex;
679 struct {
680 unsigned instr_type:4;
681 unsigned num_srcs:20;
682 unsigned dest:8;
683 } phi;
684 struct {
685 unsigned instr_type:4;
686 unsigned type:2;
687 unsigned _pad:26;
688 } jump;
689 };
690
691 /* Write "lo24" as low 24 bits in the first uint32. */
692 static void
write_dest(write_ctx * ctx,const nir_dest * dst,union packed_instr header,nir_instr_type instr_type)693 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
694 nir_instr_type instr_type)
695 {
696 STATIC_ASSERT(sizeof(union packed_dest) == 1);
697 union packed_dest dest;
698 dest.u8 = 0;
699
700 dest.ssa.is_ssa = dst->is_ssa;
701 if (dst->is_ssa) {
702 dest.ssa.has_name = !ctx->strip && dst->ssa.name;
703 dest.ssa.num_components =
704 encode_num_components_in_3bits(dst->ssa.num_components);
705 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
706 } else {
707 dest.reg.is_indirect = !!(dst->reg.indirect);
708 }
709 header.any.dest = dest.u8;
710
711 /* Check if the current ALU instruction has the same header as the previous
712 * instruction that is also ALU. If it is, we don't have to write
713 * the current header. This is a typical occurence after scalarization.
714 */
715 if (instr_type == nir_instr_type_alu) {
716 bool equal_header = false;
717
718 if (ctx->last_instr_type == nir_instr_type_alu) {
719 assert(ctx->last_alu_header_offset);
720 union packed_instr last_header;
721 memcpy(&last_header, ctx->blob->data + ctx->last_alu_header_offset,
722 sizeof(last_header));
723
724 /* Clear the field that counts ALUs with equal headers. */
725 union packed_instr clean_header;
726 clean_header.u32 = last_header.u32;
727 clean_header.alu.num_followup_alu_sharing_header = 0;
728
729 /* There can be at most 4 consecutive ALU instructions
730 * sharing the same header.
731 */
732 if (last_header.alu.num_followup_alu_sharing_header < 3 &&
733 header.u32 == clean_header.u32) {
734 last_header.alu.num_followup_alu_sharing_header++;
735 memcpy(ctx->blob->data + ctx->last_alu_header_offset,
736 &last_header, sizeof(last_header));
737
738 equal_header = true;
739 }
740 }
741
742 if (!equal_header) {
743 ctx->last_alu_header_offset = ctx->blob->size;
744 blob_write_uint32(ctx->blob, header.u32);
745 }
746 } else {
747 blob_write_uint32(ctx->blob, header.u32);
748 }
749
750 if (dest.ssa.is_ssa &&
751 dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
752 blob_write_uint32(ctx->blob, dst->ssa.num_components);
753
754 if (dst->is_ssa) {
755 write_add_object(ctx, &dst->ssa);
756 if (dest.ssa.has_name)
757 blob_write_string(ctx->blob, dst->ssa.name);
758 } else {
759 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
760 blob_write_uint32(ctx->blob, dst->reg.base_offset);
761 if (dst->reg.indirect)
762 write_src(ctx, dst->reg.indirect);
763 }
764 }
765
766 static void
read_dest(read_ctx * ctx,nir_dest * dst,nir_instr * instr,union packed_instr header)767 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
768 union packed_instr header)
769 {
770 union packed_dest dest;
771 dest.u8 = header.any.dest;
772
773 if (dest.ssa.is_ssa) {
774 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
775 unsigned num_components;
776 if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
777 num_components = blob_read_uint32(ctx->blob);
778 else
779 num_components = decode_num_components_in_3bits(dest.ssa.num_components);
780 char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
781 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
782 read_add_object(ctx, &dst->ssa);
783 } else {
784 dst->reg.reg = read_object(ctx);
785 dst->reg.base_offset = blob_read_uint32(ctx->blob);
786 if (dest.reg.is_indirect) {
787 dst->reg.indirect = ralloc(instr, nir_src);
788 read_src(ctx, dst->reg.indirect, instr);
789 }
790 }
791 }
792
793 static bool
are_object_ids_16bit(write_ctx * ctx)794 are_object_ids_16bit(write_ctx *ctx)
795 {
796 /* Check the highest object ID, because they are monotonic. */
797 return ctx->next_idx < (1 << 16);
798 }
799
800 static bool
is_alu_src_ssa_16bit(write_ctx * ctx,const nir_alu_instr * alu)801 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
802 {
803 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
804
805 for (unsigned i = 0; i < num_srcs; i++) {
806 if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
807 return false;
808
809 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
810
811 for (unsigned chan = 0; chan < src_components; chan++) {
812 /* The swizzles for src0.x and src1.x are stored
813 * in writemask_or_two_swizzles for SSA ALUs.
814 */
815 if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
816 alu->src[i].swizzle[chan] < 4)
817 continue;
818
819 if (alu->src[i].swizzle[chan] != chan)
820 return false;
821 }
822 }
823
824 return are_object_ids_16bit(ctx);
825 }
826
827 static void
write_alu(write_ctx * ctx,const nir_alu_instr * alu)828 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
829 {
830 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
831 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
832
833 /* 9 bits for nir_op */
834 STATIC_ASSERT(nir_num_opcodes <= 512);
835 union packed_instr header;
836 header.u32 = 0;
837
838 header.alu.instr_type = alu->instr.type;
839 header.alu.exact = alu->exact;
840 header.alu.no_signed_wrap = alu->no_signed_wrap;
841 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
842 header.alu.saturate = alu->dest.saturate;
843 header.alu.op = alu->op;
844 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
845
846 if (header.alu.packed_src_ssa_16bit &&
847 alu->dest.dest.is_ssa) {
848 /* For packed srcs of SSA ALUs, this field stores the swizzles. */
849 header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
850 if (num_srcs > 1)
851 header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
852 } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
853 /* For vec4 registers, this field is a writemask. */
854 header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
855 }
856
857 write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
858
859 if (!alu->dest.dest.is_ssa && dst_components > 4)
860 blob_write_uint32(ctx->blob, alu->dest.write_mask);
861
862 if (header.alu.packed_src_ssa_16bit) {
863 for (unsigned i = 0; i < num_srcs; i++) {
864 assert(alu->src[i].src.is_ssa);
865 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
866 assert(idx < (1 << 16));
867 blob_write_uint16(ctx->blob, idx);
868 }
869 } else {
870 for (unsigned i = 0; i < num_srcs; i++) {
871 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
872 unsigned src_components = nir_src_num_components(alu->src[i].src);
873 union packed_src src;
874 bool packed = src_components <= 4 && src_channels <= 4;
875 src.u32 = 0;
876
877 src.alu.negate = alu->src[i].negate;
878 src.alu.abs = alu->src[i].abs;
879
880 if (packed) {
881 src.alu.swizzle_x = alu->src[i].swizzle[0];
882 src.alu.swizzle_y = alu->src[i].swizzle[1];
883 src.alu.swizzle_z = alu->src[i].swizzle[2];
884 src.alu.swizzle_w = alu->src[i].swizzle[3];
885 }
886
887 write_src_full(ctx, &alu->src[i].src, src);
888
889 /* Store swizzles for vec8 and vec16. */
890 if (!packed) {
891 for (unsigned o = 0; o < src_channels; o += 8) {
892 unsigned value = 0;
893
894 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
895 value |= (uint32_t)alu->src[i].swizzle[o + j] <<
896 (4 * j); /* 4 bits per swizzle */
897 }
898
899 blob_write_uint32(ctx->blob, value);
900 }
901 }
902 }
903 }
904 }
905
906 static nir_alu_instr *
read_alu(read_ctx * ctx,union packed_instr header)907 read_alu(read_ctx *ctx, union packed_instr header)
908 {
909 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
910 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
911
912 alu->exact = header.alu.exact;
913 alu->no_signed_wrap = header.alu.no_signed_wrap;
914 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
915 alu->dest.saturate = header.alu.saturate;
916
917 read_dest(ctx, &alu->dest.dest, &alu->instr, header);
918
919 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
920
921 if (alu->dest.dest.is_ssa) {
922 alu->dest.write_mask = u_bit_consecutive(0, dst_components);
923 } else if (dst_components <= 4) {
924 alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
925 } else {
926 alu->dest.write_mask = blob_read_uint32(ctx->blob);
927 }
928
929 if (header.alu.packed_src_ssa_16bit) {
930 for (unsigned i = 0; i < num_srcs; i++) {
931 nir_alu_src *src = &alu->src[i];
932 src->src.is_ssa = true;
933 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
934
935 memset(&src->swizzle, 0, sizeof(src->swizzle));
936
937 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
938
939 for (unsigned chan = 0; chan < src_components; chan++)
940 src->swizzle[chan] = chan;
941 }
942 } else {
943 for (unsigned i = 0; i < num_srcs; i++) {
944 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
945 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
946 unsigned src_components = nir_src_num_components(alu->src[i].src);
947 bool packed = src_components <= 4 && src_channels <= 4;
948
949 alu->src[i].negate = src.alu.negate;
950 alu->src[i].abs = src.alu.abs;
951
952 memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
953
954 if (packed) {
955 alu->src[i].swizzle[0] = src.alu.swizzle_x;
956 alu->src[i].swizzle[1] = src.alu.swizzle_y;
957 alu->src[i].swizzle[2] = src.alu.swizzle_z;
958 alu->src[i].swizzle[3] = src.alu.swizzle_w;
959 } else {
960 /* Load swizzles for vec8 and vec16. */
961 for (unsigned o = 0; o < src_channels; o += 8) {
962 unsigned value = blob_read_uint32(ctx->blob);
963
964 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
965 alu->src[i].swizzle[o + j] =
966 (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
967 }
968 }
969 }
970 }
971 }
972
973 if (header.alu.packed_src_ssa_16bit &&
974 alu->dest.dest.is_ssa) {
975 alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
976 if (num_srcs > 1)
977 alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
978 }
979
980 return alu;
981 }
982
983 static void
write_deref(write_ctx * ctx,const nir_deref_instr * deref)984 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
985 {
986 assert(deref->deref_type < 8);
987 assert(deref->modes < (1 << 14));
988
989 union packed_instr header;
990 header.u32 = 0;
991
992 header.deref.instr_type = deref->instr.type;
993 header.deref.deref_type = deref->deref_type;
994
995 if (deref->deref_type == nir_deref_type_cast) {
996 header.deref.modes = deref->modes;
997 header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
998 }
999
1000 unsigned var_idx = 0;
1001 if (deref->deref_type == nir_deref_type_var) {
1002 var_idx = write_lookup_object(ctx, deref->var);
1003 if (var_idx && var_idx < (1 << 16))
1004 header.deref_var.object_idx = var_idx;
1005 }
1006
1007 if (deref->deref_type == nir_deref_type_array ||
1008 deref->deref_type == nir_deref_type_ptr_as_array) {
1009 header.deref.packed_src_ssa_16bit =
1010 deref->parent.is_ssa && deref->arr.index.is_ssa &&
1011 are_object_ids_16bit(ctx);
1012 }
1013
1014 write_dest(ctx, &deref->dest, header, deref->instr.type);
1015
1016 switch (deref->deref_type) {
1017 case nir_deref_type_var:
1018 if (!header.deref_var.object_idx)
1019 blob_write_uint32(ctx->blob, var_idx);
1020 break;
1021
1022 case nir_deref_type_struct:
1023 write_src(ctx, &deref->parent);
1024 blob_write_uint32(ctx->blob, deref->strct.index);
1025 break;
1026
1027 case nir_deref_type_array:
1028 case nir_deref_type_ptr_as_array:
1029 if (header.deref.packed_src_ssa_16bit) {
1030 blob_write_uint16(ctx->blob,
1031 write_lookup_object(ctx, deref->parent.ssa));
1032 blob_write_uint16(ctx->blob,
1033 write_lookup_object(ctx, deref->arr.index.ssa));
1034 } else {
1035 write_src(ctx, &deref->parent);
1036 write_src(ctx, &deref->arr.index);
1037 }
1038 break;
1039
1040 case nir_deref_type_cast:
1041 write_src(ctx, &deref->parent);
1042 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1043 blob_write_uint32(ctx->blob, deref->cast.align_mul);
1044 blob_write_uint32(ctx->blob, deref->cast.align_offset);
1045 if (!header.deref.cast_type_same_as_last) {
1046 encode_type_to_blob(ctx->blob, deref->type);
1047 ctx->last_type = deref->type;
1048 }
1049 break;
1050
1051 case nir_deref_type_array_wildcard:
1052 write_src(ctx, &deref->parent);
1053 break;
1054
1055 default:
1056 unreachable("Invalid deref type");
1057 }
1058 }
1059
1060 static nir_deref_instr *
read_deref(read_ctx * ctx,union packed_instr header)1061 read_deref(read_ctx *ctx, union packed_instr header)
1062 {
1063 nir_deref_type deref_type = header.deref.deref_type;
1064 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1065
1066 read_dest(ctx, &deref->dest, &deref->instr, header);
1067
1068 nir_deref_instr *parent;
1069
1070 switch (deref->deref_type) {
1071 case nir_deref_type_var:
1072 if (header.deref_var.object_idx)
1073 deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1074 else
1075 deref->var = read_object(ctx);
1076
1077 deref->type = deref->var->type;
1078 break;
1079
1080 case nir_deref_type_struct:
1081 read_src(ctx, &deref->parent, &deref->instr);
1082 parent = nir_src_as_deref(deref->parent);
1083 deref->strct.index = blob_read_uint32(ctx->blob);
1084 deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1085 break;
1086
1087 case nir_deref_type_array:
1088 case nir_deref_type_ptr_as_array:
1089 if (header.deref.packed_src_ssa_16bit) {
1090 deref->parent.is_ssa = true;
1091 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1092 deref->arr.index.is_ssa = true;
1093 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1094 } else {
1095 read_src(ctx, &deref->parent, &deref->instr);
1096 read_src(ctx, &deref->arr.index, &deref->instr);
1097 }
1098
1099 parent = nir_src_as_deref(deref->parent);
1100 if (deref->deref_type == nir_deref_type_array)
1101 deref->type = glsl_get_array_element(parent->type);
1102 else
1103 deref->type = parent->type;
1104 break;
1105
1106 case nir_deref_type_cast:
1107 read_src(ctx, &deref->parent, &deref->instr);
1108 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1109 deref->cast.align_mul = blob_read_uint32(ctx->blob);
1110 deref->cast.align_offset = blob_read_uint32(ctx->blob);
1111 if (header.deref.cast_type_same_as_last) {
1112 deref->type = ctx->last_type;
1113 } else {
1114 deref->type = decode_type_from_blob(ctx->blob);
1115 ctx->last_type = deref->type;
1116 }
1117 break;
1118
1119 case nir_deref_type_array_wildcard:
1120 read_src(ctx, &deref->parent, &deref->instr);
1121 parent = nir_src_as_deref(deref->parent);
1122 deref->type = glsl_get_array_element(parent->type);
1123 break;
1124
1125 default:
1126 unreachable("Invalid deref type");
1127 }
1128
1129 if (deref_type == nir_deref_type_var) {
1130 deref->modes = deref->var->data.mode;
1131 } else if (deref->deref_type == nir_deref_type_cast) {
1132 deref->modes = header.deref.modes;
1133 } else {
1134 assert(deref->parent.is_ssa);
1135 deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
1136 }
1137
1138 return deref;
1139 }
1140
1141 static void
write_intrinsic(write_ctx * ctx,const nir_intrinsic_instr * intrin)1142 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1143 {
1144 /* 9 bits for nir_intrinsic_op */
1145 STATIC_ASSERT(nir_num_intrinsics <= 512);
1146 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1147 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1148 assert(intrin->intrinsic < 512);
1149
1150 union packed_instr header;
1151 header.u32 = 0;
1152
1153 header.intrinsic.instr_type = intrin->instr.type;
1154 header.intrinsic.intrinsic = intrin->intrinsic;
1155
1156 /* Analyze constant indices to decide how to encode them. */
1157 if (num_indices) {
1158 unsigned max_bits = 0;
1159 for (unsigned i = 0; i < num_indices; i++) {
1160 unsigned max = util_last_bit(intrin->const_index[i]);
1161 max_bits = MAX2(max_bits, max);
1162 }
1163
1164 if (max_bits * num_indices <= 9) {
1165 header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined;
1166
1167 /* Pack all const indices into 6 bits. */
1168 unsigned bit_size = 9 / num_indices;
1169 for (unsigned i = 0; i < num_indices; i++) {
1170 header.intrinsic.packed_const_indices |=
1171 intrin->const_index[i] << (i * bit_size);
1172 }
1173 } else if (max_bits <= 8)
1174 header.intrinsic.const_indices_encoding = const_indices_8bit;
1175 else if (max_bits <= 16)
1176 header.intrinsic.const_indices_encoding = const_indices_16bit;
1177 else
1178 header.intrinsic.const_indices_encoding = const_indices_32bit;
1179 }
1180
1181 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1182 write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1183 else
1184 blob_write_uint32(ctx->blob, header.u32);
1185
1186 for (unsigned i = 0; i < num_srcs; i++)
1187 write_src(ctx, &intrin->src[i]);
1188
1189 if (num_indices) {
1190 switch (header.intrinsic.const_indices_encoding) {
1191 case const_indices_8bit:
1192 for (unsigned i = 0; i < num_indices; i++)
1193 blob_write_uint8(ctx->blob, intrin->const_index[i]);
1194 break;
1195 case const_indices_16bit:
1196 for (unsigned i = 0; i < num_indices; i++)
1197 blob_write_uint16(ctx->blob, intrin->const_index[i]);
1198 break;
1199 case const_indices_32bit:
1200 for (unsigned i = 0; i < num_indices; i++)
1201 blob_write_uint32(ctx->blob, intrin->const_index[i]);
1202 break;
1203 }
1204 }
1205 }
1206
1207 static nir_intrinsic_instr *
read_intrinsic(read_ctx * ctx,union packed_instr header)1208 read_intrinsic(read_ctx *ctx, union packed_instr header)
1209 {
1210 nir_intrinsic_op op = header.intrinsic.intrinsic;
1211 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1212
1213 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1214 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1215
1216 if (nir_intrinsic_infos[op].has_dest)
1217 read_dest(ctx, &intrin->dest, &intrin->instr, header);
1218
1219 for (unsigned i = 0; i < num_srcs; i++)
1220 read_src(ctx, &intrin->src[i], &intrin->instr);
1221
1222 /* Vectorized instrinsics have num_components same as dst or src that has
1223 * 0 components in the info. Find it.
1224 */
1225 if (nir_intrinsic_infos[op].has_dest &&
1226 nir_intrinsic_infos[op].dest_components == 0) {
1227 intrin->num_components = nir_dest_num_components(intrin->dest);
1228 } else {
1229 for (unsigned i = 0; i < num_srcs; i++) {
1230 if (nir_intrinsic_infos[op].src_components[i] == 0) {
1231 intrin->num_components = nir_src_num_components(intrin->src[i]);
1232 break;
1233 }
1234 }
1235 }
1236
1237 if (num_indices) {
1238 switch (header.intrinsic.const_indices_encoding) {
1239 case const_indices_9bit_all_combined: {
1240 unsigned bit_size = 9 / num_indices;
1241 unsigned bit_mask = u_bit_consecutive(0, bit_size);
1242 for (unsigned i = 0; i < num_indices; i++) {
1243 intrin->const_index[i] =
1244 (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1245 bit_mask;
1246 }
1247 break;
1248 }
1249 case const_indices_8bit:
1250 for (unsigned i = 0; i < num_indices; i++)
1251 intrin->const_index[i] = blob_read_uint8(ctx->blob);
1252 break;
1253 case const_indices_16bit:
1254 for (unsigned i = 0; i < num_indices; i++)
1255 intrin->const_index[i] = blob_read_uint16(ctx->blob);
1256 break;
1257 case const_indices_32bit:
1258 for (unsigned i = 0; i < num_indices; i++)
1259 intrin->const_index[i] = blob_read_uint32(ctx->blob);
1260 break;
1261 }
1262 }
1263
1264 return intrin;
1265 }
1266
1267 static void
write_load_const(write_ctx * ctx,const nir_load_const_instr * lc)1268 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1269 {
1270 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1271 union packed_instr header;
1272 header.u32 = 0;
1273
1274 header.load_const.instr_type = lc->instr.type;
1275 header.load_const.last_component = lc->def.num_components - 1;
1276 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1277 header.load_const.packing = load_const_full;
1278
1279 /* Try to pack 1-component constants into the 19 free bits in the header. */
1280 if (lc->def.num_components == 1) {
1281 switch (lc->def.bit_size) {
1282 case 64:
1283 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1284 /* packed_value contains high 19 bits, low bits are 0 */
1285 header.load_const.packing = load_const_scalar_hi_19bits;
1286 header.load_const.packed_value = lc->value[0].u64 >> 45;
1287 } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
1288 /* packed_value contains low 19 bits, high bits are sign-extended */
1289 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1290 header.load_const.packed_value = lc->value[0].u64;
1291 }
1292 break;
1293
1294 case 32:
1295 if ((lc->value[0].u32 & 0x1fff) == 0) {
1296 header.load_const.packing = load_const_scalar_hi_19bits;
1297 header.load_const.packed_value = lc->value[0].u32 >> 13;
1298 } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
1299 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1300 header.load_const.packed_value = lc->value[0].u32;
1301 }
1302 break;
1303
1304 case 16:
1305 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1306 header.load_const.packed_value = lc->value[0].u16;
1307 break;
1308 case 8:
1309 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1310 header.load_const.packed_value = lc->value[0].u8;
1311 break;
1312 case 1:
1313 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1314 header.load_const.packed_value = lc->value[0].b;
1315 break;
1316 default:
1317 unreachable("invalid bit_size");
1318 }
1319 }
1320
1321 blob_write_uint32(ctx->blob, header.u32);
1322
1323 if (header.load_const.packing == load_const_full) {
1324 switch (lc->def.bit_size) {
1325 case 64:
1326 blob_write_bytes(ctx->blob, lc->value,
1327 sizeof(*lc->value) * lc->def.num_components);
1328 break;
1329
1330 case 32:
1331 for (unsigned i = 0; i < lc->def.num_components; i++)
1332 blob_write_uint32(ctx->blob, lc->value[i].u32);
1333 break;
1334
1335 case 16:
1336 for (unsigned i = 0; i < lc->def.num_components; i++)
1337 blob_write_uint16(ctx->blob, lc->value[i].u16);
1338 break;
1339
1340 default:
1341 assert(lc->def.bit_size <= 8);
1342 for (unsigned i = 0; i < lc->def.num_components; i++)
1343 blob_write_uint8(ctx->blob, lc->value[i].u8);
1344 break;
1345 }
1346 }
1347
1348 write_add_object(ctx, &lc->def);
1349 }
1350
1351 static nir_load_const_instr *
read_load_const(read_ctx * ctx,union packed_instr header)1352 read_load_const(read_ctx *ctx, union packed_instr header)
1353 {
1354 nir_load_const_instr *lc =
1355 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1356 decode_bit_size_3bits(header.load_const.bit_size));
1357
1358 switch (header.load_const.packing) {
1359 case load_const_scalar_hi_19bits:
1360 switch (lc->def.bit_size) {
1361 case 64:
1362 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1363 break;
1364 case 32:
1365 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1366 break;
1367 default:
1368 unreachable("invalid bit_size");
1369 }
1370 break;
1371
1372 case load_const_scalar_lo_19bits_sext:
1373 switch (lc->def.bit_size) {
1374 case 64:
1375 lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1376 break;
1377 case 32:
1378 lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1379 break;
1380 case 16:
1381 lc->value[0].u16 = header.load_const.packed_value;
1382 break;
1383 case 8:
1384 lc->value[0].u8 = header.load_const.packed_value;
1385 break;
1386 case 1:
1387 lc->value[0].b = header.load_const.packed_value;
1388 break;
1389 default:
1390 unreachable("invalid bit_size");
1391 }
1392 break;
1393
1394 case load_const_full:
1395 switch (lc->def.bit_size) {
1396 case 64:
1397 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1398 break;
1399
1400 case 32:
1401 for (unsigned i = 0; i < lc->def.num_components; i++)
1402 lc->value[i].u32 = blob_read_uint32(ctx->blob);
1403 break;
1404
1405 case 16:
1406 for (unsigned i = 0; i < lc->def.num_components; i++)
1407 lc->value[i].u16 = blob_read_uint16(ctx->blob);
1408 break;
1409
1410 default:
1411 assert(lc->def.bit_size <= 8);
1412 for (unsigned i = 0; i < lc->def.num_components; i++)
1413 lc->value[i].u8 = blob_read_uint8(ctx->blob);
1414 break;
1415 }
1416 break;
1417 }
1418
1419 read_add_object(ctx, &lc->def);
1420 return lc;
1421 }
1422
1423 static void
write_ssa_undef(write_ctx * ctx,const nir_ssa_undef_instr * undef)1424 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1425 {
1426 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1427
1428 union packed_instr header;
1429 header.u32 = 0;
1430
1431 header.undef.instr_type = undef->instr.type;
1432 header.undef.last_component = undef->def.num_components - 1;
1433 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1434
1435 blob_write_uint32(ctx->blob, header.u32);
1436 write_add_object(ctx, &undef->def);
1437 }
1438
1439 static nir_ssa_undef_instr *
read_ssa_undef(read_ctx * ctx,union packed_instr header)1440 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1441 {
1442 nir_ssa_undef_instr *undef =
1443 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1444 decode_bit_size_3bits(header.undef.bit_size));
1445
1446 read_add_object(ctx, &undef->def);
1447 return undef;
1448 }
1449
1450 union packed_tex_data {
1451 uint32_t u32;
1452 struct {
1453 unsigned sampler_dim:4;
1454 unsigned dest_type:8;
1455 unsigned coord_components:3;
1456 unsigned is_array:1;
1457 unsigned is_shadow:1;
1458 unsigned is_new_style_shadow:1;
1459 unsigned component:2;
1460 unsigned texture_non_uniform:1;
1461 unsigned sampler_non_uniform:1;
1462 unsigned unused:8; /* Mark unused for valgrind. */
1463 } u;
1464 };
1465
1466 static void
write_tex(write_ctx * ctx,const nir_tex_instr * tex)1467 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1468 {
1469 assert(tex->num_srcs < 16);
1470 assert(tex->op < 16);
1471
1472 union packed_instr header;
1473 header.u32 = 0;
1474
1475 header.tex.instr_type = tex->instr.type;
1476 header.tex.num_srcs = tex->num_srcs;
1477 header.tex.op = tex->op;
1478
1479 write_dest(ctx, &tex->dest, header, tex->instr.type);
1480
1481 blob_write_uint32(ctx->blob, tex->texture_index);
1482 blob_write_uint32(ctx->blob, tex->sampler_index);
1483 if (tex->op == nir_texop_tg4)
1484 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1485
1486 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1487 union packed_tex_data packed = {
1488 .u.sampler_dim = tex->sampler_dim,
1489 .u.dest_type = tex->dest_type,
1490 .u.coord_components = tex->coord_components,
1491 .u.is_array = tex->is_array,
1492 .u.is_shadow = tex->is_shadow,
1493 .u.is_new_style_shadow = tex->is_new_style_shadow,
1494 .u.component = tex->component,
1495 .u.texture_non_uniform = tex->texture_non_uniform,
1496 .u.sampler_non_uniform = tex->sampler_non_uniform,
1497 };
1498 blob_write_uint32(ctx->blob, packed.u32);
1499
1500 for (unsigned i = 0; i < tex->num_srcs; i++) {
1501 union packed_src src;
1502 src.u32 = 0;
1503 src.tex.src_type = tex->src[i].src_type;
1504 write_src_full(ctx, &tex->src[i].src, src);
1505 }
1506 }
1507
1508 static nir_tex_instr *
read_tex(read_ctx * ctx,union packed_instr header)1509 read_tex(read_ctx *ctx, union packed_instr header)
1510 {
1511 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1512
1513 read_dest(ctx, &tex->dest, &tex->instr, header);
1514
1515 tex->op = header.tex.op;
1516 tex->texture_index = blob_read_uint32(ctx->blob);
1517 tex->sampler_index = blob_read_uint32(ctx->blob);
1518 if (tex->op == nir_texop_tg4)
1519 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1520
1521 union packed_tex_data packed;
1522 packed.u32 = blob_read_uint32(ctx->blob);
1523 tex->sampler_dim = packed.u.sampler_dim;
1524 tex->dest_type = packed.u.dest_type;
1525 tex->coord_components = packed.u.coord_components;
1526 tex->is_array = packed.u.is_array;
1527 tex->is_shadow = packed.u.is_shadow;
1528 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1529 tex->component = packed.u.component;
1530 tex->texture_non_uniform = packed.u.texture_non_uniform;
1531 tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1532
1533 for (unsigned i = 0; i < tex->num_srcs; i++) {
1534 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1535 tex->src[i].src_type = src.tex.src_type;
1536 }
1537
1538 return tex;
1539 }
1540
1541 static void
write_phi(write_ctx * ctx,const nir_phi_instr * phi)1542 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1543 {
1544 union packed_instr header;
1545 header.u32 = 0;
1546
1547 header.phi.instr_type = phi->instr.type;
1548 header.phi.num_srcs = exec_list_length(&phi->srcs);
1549
1550 /* Phi nodes are special, since they may reference SSA definitions and
1551 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1552 * and then store enough information so that a later fixup pass can fill
1553 * them in correctly.
1554 */
1555 write_dest(ctx, &phi->dest, header, phi->instr.type);
1556
1557 nir_foreach_phi_src(src, phi) {
1558 assert(src->src.is_ssa);
1559 size_t blob_offset = blob_reserve_uint32(ctx->blob);
1560 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1561 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1562 write_phi_fixup fixup = {
1563 .blob_offset = blob_offset,
1564 .src = src->src.ssa,
1565 .block = src->pred,
1566 };
1567 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1568 }
1569 }
1570
1571 static void
write_fixup_phis(write_ctx * ctx)1572 write_fixup_phis(write_ctx *ctx)
1573 {
1574 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1575 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
1576 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
1577 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
1578 }
1579
1580 util_dynarray_clear(&ctx->phi_fixups);
1581 }
1582
1583 static nir_phi_instr *
read_phi(read_ctx * ctx,nir_block * blk,union packed_instr header)1584 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1585 {
1586 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1587
1588 read_dest(ctx, &phi->dest, &phi->instr, header);
1589
1590 /* For similar reasons as before, we just store the index directly into the
1591 * pointer, and let a later pass resolve the phi sources.
1592 *
1593 * In order to ensure that the copied sources (which are just the indices
1594 * from the blob for now) don't get inserted into the old shader's use-def
1595 * lists, we have to add the phi instruction *before* we set up its
1596 * sources.
1597 */
1598 nir_instr_insert_after_block(blk, &phi->instr);
1599
1600 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1601 nir_phi_src *src = ralloc(phi, nir_phi_src);
1602
1603 src->src.is_ssa = true;
1604 src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1605 src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1606
1607 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1608 * we have to set the parent_instr manually. It doesn't really matter
1609 * when we do it, so we might as well do it here.
1610 */
1611 src->src.parent_instr = &phi->instr;
1612
1613 /* Stash it in the list of phi sources. We'll walk this list and fix up
1614 * sources at the very end of read_function_impl.
1615 */
1616 list_add(&src->src.use_link, &ctx->phi_srcs);
1617
1618 exec_list_push_tail(&phi->srcs, &src->node);
1619 }
1620
1621 return phi;
1622 }
1623
1624 static void
read_fixup_phis(read_ctx * ctx)1625 read_fixup_phis(read_ctx *ctx)
1626 {
1627 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1628 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1629 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1630
1631 /* Remove from this list */
1632 list_del(&src->src.use_link);
1633
1634 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1635 }
1636 assert(list_is_empty(&ctx->phi_srcs));
1637 }
1638
1639 static void
write_jump(write_ctx * ctx,const nir_jump_instr * jmp)1640 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1641 {
1642 /* These aren't handled because they require special block linking */
1643 assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
1644
1645 assert(jmp->type < 4);
1646
1647 union packed_instr header;
1648 header.u32 = 0;
1649
1650 header.jump.instr_type = jmp->instr.type;
1651 header.jump.type = jmp->type;
1652
1653 blob_write_uint32(ctx->blob, header.u32);
1654 }
1655
1656 static nir_jump_instr *
read_jump(read_ctx * ctx,union packed_instr header)1657 read_jump(read_ctx *ctx, union packed_instr header)
1658 {
1659 /* These aren't handled because they require special block linking */
1660 assert(header.jump.type != nir_jump_goto &&
1661 header.jump.type != nir_jump_goto_if);
1662
1663 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1664 return jmp;
1665 }
1666
1667 static void
write_call(write_ctx * ctx,const nir_call_instr * call)1668 write_call(write_ctx *ctx, const nir_call_instr *call)
1669 {
1670 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1671
1672 for (unsigned i = 0; i < call->num_params; i++)
1673 write_src(ctx, &call->params[i]);
1674 }
1675
1676 static nir_call_instr *
read_call(read_ctx * ctx)1677 read_call(read_ctx *ctx)
1678 {
1679 nir_function *callee = read_object(ctx);
1680 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1681
1682 for (unsigned i = 0; i < call->num_params; i++)
1683 read_src(ctx, &call->params[i], call);
1684
1685 return call;
1686 }
1687
1688 static void
write_instr(write_ctx * ctx,const nir_instr * instr)1689 write_instr(write_ctx *ctx, const nir_instr *instr)
1690 {
1691 /* We have only 4 bits for the instruction type. */
1692 assert(instr->type < 16);
1693
1694 switch (instr->type) {
1695 case nir_instr_type_alu:
1696 write_alu(ctx, nir_instr_as_alu(instr));
1697 break;
1698 case nir_instr_type_deref:
1699 write_deref(ctx, nir_instr_as_deref(instr));
1700 break;
1701 case nir_instr_type_intrinsic:
1702 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1703 break;
1704 case nir_instr_type_load_const:
1705 write_load_const(ctx, nir_instr_as_load_const(instr));
1706 break;
1707 case nir_instr_type_ssa_undef:
1708 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1709 break;
1710 case nir_instr_type_tex:
1711 write_tex(ctx, nir_instr_as_tex(instr));
1712 break;
1713 case nir_instr_type_phi:
1714 write_phi(ctx, nir_instr_as_phi(instr));
1715 break;
1716 case nir_instr_type_jump:
1717 write_jump(ctx, nir_instr_as_jump(instr));
1718 break;
1719 case nir_instr_type_call:
1720 blob_write_uint32(ctx->blob, instr->type);
1721 write_call(ctx, nir_instr_as_call(instr));
1722 break;
1723 case nir_instr_type_parallel_copy:
1724 unreachable("Cannot write parallel copies");
1725 default:
1726 unreachable("bad instr type");
1727 }
1728 }
1729
1730 /* Return the number of instructions read. */
1731 static unsigned
read_instr(read_ctx * ctx,nir_block * block)1732 read_instr(read_ctx *ctx, nir_block *block)
1733 {
1734 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1735 union packed_instr header;
1736 header.u32 = blob_read_uint32(ctx->blob);
1737 nir_instr *instr;
1738
1739 switch (header.any.instr_type) {
1740 case nir_instr_type_alu:
1741 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1742 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1743 return header.alu.num_followup_alu_sharing_header + 1;
1744 case nir_instr_type_deref:
1745 instr = &read_deref(ctx, header)->instr;
1746 break;
1747 case nir_instr_type_intrinsic:
1748 instr = &read_intrinsic(ctx, header)->instr;
1749 break;
1750 case nir_instr_type_load_const:
1751 instr = &read_load_const(ctx, header)->instr;
1752 break;
1753 case nir_instr_type_ssa_undef:
1754 instr = &read_ssa_undef(ctx, header)->instr;
1755 break;
1756 case nir_instr_type_tex:
1757 instr = &read_tex(ctx, header)->instr;
1758 break;
1759 case nir_instr_type_phi:
1760 /* Phi instructions are a bit of a special case when reading because we
1761 * don't want inserting the instruction to automatically handle use/defs
1762 * for us. Instead, we need to wait until all the blocks/instructions
1763 * are read so that we can set their sources up.
1764 */
1765 read_phi(ctx, block, header);
1766 return 1;
1767 case nir_instr_type_jump:
1768 instr = &read_jump(ctx, header)->instr;
1769 break;
1770 case nir_instr_type_call:
1771 instr = &read_call(ctx)->instr;
1772 break;
1773 case nir_instr_type_parallel_copy:
1774 unreachable("Cannot read parallel copies");
1775 default:
1776 unreachable("bad instr type");
1777 }
1778
1779 nir_instr_insert_after_block(block, instr);
1780 return 1;
1781 }
1782
1783 static void
write_block(write_ctx * ctx,const nir_block * block)1784 write_block(write_ctx *ctx, const nir_block *block)
1785 {
1786 write_add_object(ctx, block);
1787 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1788
1789 ctx->last_instr_type = ~0;
1790 ctx->last_alu_header_offset = 0;
1791
1792 nir_foreach_instr(instr, block) {
1793 write_instr(ctx, instr);
1794 ctx->last_instr_type = instr->type;
1795 }
1796 }
1797
1798 static void
read_block(read_ctx * ctx,struct exec_list * cf_list)1799 read_block(read_ctx *ctx, struct exec_list *cf_list)
1800 {
1801 /* Don't actually create a new block. Just use the one from the tail of
1802 * the list. NIR guarantees that the tail of the list is a block and that
1803 * no two blocks are side-by-side in the IR; It should be empty.
1804 */
1805 nir_block *block =
1806 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1807
1808 read_add_object(ctx, block);
1809 unsigned num_instrs = blob_read_uint32(ctx->blob);
1810 for (unsigned i = 0; i < num_instrs;) {
1811 i += read_instr(ctx, block);
1812 }
1813 }
1814
1815 static void
1816 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1817
1818 static void
1819 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1820
1821 static void
write_if(write_ctx * ctx,nir_if * nif)1822 write_if(write_ctx *ctx, nir_if *nif)
1823 {
1824 write_src(ctx, &nif->condition);
1825
1826 write_cf_list(ctx, &nif->then_list);
1827 write_cf_list(ctx, &nif->else_list);
1828 }
1829
1830 static void
read_if(read_ctx * ctx,struct exec_list * cf_list)1831 read_if(read_ctx *ctx, struct exec_list *cf_list)
1832 {
1833 nir_if *nif = nir_if_create(ctx->nir);
1834
1835 read_src(ctx, &nif->condition, nif);
1836
1837 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1838
1839 read_cf_list(ctx, &nif->then_list);
1840 read_cf_list(ctx, &nif->else_list);
1841 }
1842
1843 static void
write_loop(write_ctx * ctx,nir_loop * loop)1844 write_loop(write_ctx *ctx, nir_loop *loop)
1845 {
1846 write_cf_list(ctx, &loop->body);
1847 }
1848
1849 static void
read_loop(read_ctx * ctx,struct exec_list * cf_list)1850 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1851 {
1852 nir_loop *loop = nir_loop_create(ctx->nir);
1853
1854 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1855
1856 read_cf_list(ctx, &loop->body);
1857 }
1858
1859 static void
write_cf_node(write_ctx * ctx,nir_cf_node * cf)1860 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1861 {
1862 blob_write_uint32(ctx->blob, cf->type);
1863
1864 switch (cf->type) {
1865 case nir_cf_node_block:
1866 write_block(ctx, nir_cf_node_as_block(cf));
1867 break;
1868 case nir_cf_node_if:
1869 write_if(ctx, nir_cf_node_as_if(cf));
1870 break;
1871 case nir_cf_node_loop:
1872 write_loop(ctx, nir_cf_node_as_loop(cf));
1873 break;
1874 default:
1875 unreachable("bad cf type");
1876 }
1877 }
1878
1879 static void
read_cf_node(read_ctx * ctx,struct exec_list * list)1880 read_cf_node(read_ctx *ctx, struct exec_list *list)
1881 {
1882 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1883
1884 switch (type) {
1885 case nir_cf_node_block:
1886 read_block(ctx, list);
1887 break;
1888 case nir_cf_node_if:
1889 read_if(ctx, list);
1890 break;
1891 case nir_cf_node_loop:
1892 read_loop(ctx, list);
1893 break;
1894 default:
1895 unreachable("bad cf type");
1896 }
1897 }
1898
1899 static void
write_cf_list(write_ctx * ctx,const struct exec_list * cf_list)1900 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1901 {
1902 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1903 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1904 write_cf_node(ctx, cf);
1905 }
1906 }
1907
1908 static void
read_cf_list(read_ctx * ctx,struct exec_list * cf_list)1909 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1910 {
1911 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1912 for (unsigned i = 0; i < num_cf_nodes; i++)
1913 read_cf_node(ctx, cf_list);
1914 }
1915
1916 static void
write_function_impl(write_ctx * ctx,const nir_function_impl * fi)1917 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1918 {
1919 blob_write_uint8(ctx->blob, fi->structured);
1920
1921 write_var_list(ctx, &fi->locals);
1922 write_reg_list(ctx, &fi->registers);
1923 blob_write_uint32(ctx->blob, fi->reg_alloc);
1924
1925 write_cf_list(ctx, &fi->body);
1926 write_fixup_phis(ctx);
1927 }
1928
1929 static nir_function_impl *
read_function_impl(read_ctx * ctx,nir_function * fxn)1930 read_function_impl(read_ctx *ctx, nir_function *fxn)
1931 {
1932 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1933 fi->function = fxn;
1934
1935 fi->structured = blob_read_uint8(ctx->blob);
1936
1937 read_var_list(ctx, &fi->locals);
1938 read_reg_list(ctx, &fi->registers);
1939 fi->reg_alloc = blob_read_uint32(ctx->blob);
1940
1941 read_cf_list(ctx, &fi->body);
1942 read_fixup_phis(ctx);
1943
1944 fi->valid_metadata = 0;
1945
1946 return fi;
1947 }
1948
1949 static void
write_function(write_ctx * ctx,const nir_function * fxn)1950 write_function(write_ctx *ctx, const nir_function *fxn)
1951 {
1952 uint32_t flags = fxn->is_entrypoint;
1953 if (fxn->name)
1954 flags |= 0x2;
1955 if (fxn->impl)
1956 flags |= 0x4;
1957 blob_write_uint32(ctx->blob, flags);
1958 if (fxn->name)
1959 blob_write_string(ctx->blob, fxn->name);
1960
1961 write_add_object(ctx, fxn);
1962
1963 blob_write_uint32(ctx->blob, fxn->num_params);
1964 for (unsigned i = 0; i < fxn->num_params; i++) {
1965 uint32_t val =
1966 ((uint32_t)fxn->params[i].num_components) |
1967 ((uint32_t)fxn->params[i].bit_size) << 8;
1968 blob_write_uint32(ctx->blob, val);
1969 }
1970
1971 /* At first glance, it looks like we should write the function_impl here.
1972 * However, call instructions need to be able to reference at least the
1973 * function and those will get processed as we write the function_impls.
1974 * We stop here and write function_impls as a second pass.
1975 */
1976 }
1977
1978 static void
read_function(read_ctx * ctx)1979 read_function(read_ctx *ctx)
1980 {
1981 uint32_t flags = blob_read_uint32(ctx->blob);
1982 bool has_name = flags & 0x2;
1983 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1984
1985 nir_function *fxn = nir_function_create(ctx->nir, name);
1986
1987 read_add_object(ctx, fxn);
1988
1989 fxn->num_params = blob_read_uint32(ctx->blob);
1990 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1991 for (unsigned i = 0; i < fxn->num_params; i++) {
1992 uint32_t val = blob_read_uint32(ctx->blob);
1993 fxn->params[i].num_components = val & 0xff;
1994 fxn->params[i].bit_size = (val >> 8) & 0xff;
1995 }
1996
1997 fxn->is_entrypoint = flags & 0x1;
1998 if (flags & 0x4)
1999 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
2000 }
2001
2002 /**
2003 * Serialize NIR into a binary blob.
2004 *
2005 * \param strip Don't serialize information only useful for debugging,
2006 * such as variable names, making cache hits from similar
2007 * shaders more likely.
2008 */
2009 void
nir_serialize(struct blob * blob,const nir_shader * nir,bool strip)2010 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
2011 {
2012 write_ctx ctx = {0};
2013 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2014 ctx.blob = blob;
2015 ctx.nir = nir;
2016 ctx.strip = strip;
2017 util_dynarray_init(&ctx.phi_fixups, NULL);
2018
2019 size_t idx_size_offset = blob_reserve_uint32(blob);
2020
2021 struct shader_info info = nir->info;
2022 uint32_t strings = 0;
2023 if (!strip && info.name)
2024 strings |= 0x1;
2025 if (!strip && info.label)
2026 strings |= 0x2;
2027 blob_write_uint32(blob, strings);
2028 if (!strip && info.name)
2029 blob_write_string(blob, info.name);
2030 if (!strip && info.label)
2031 blob_write_string(blob, info.label);
2032 info.name = info.label = NULL;
2033 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2034
2035 write_var_list(&ctx, &nir->variables);
2036
2037 blob_write_uint32(blob, nir->num_inputs);
2038 blob_write_uint32(blob, nir->num_uniforms);
2039 blob_write_uint32(blob, nir->num_outputs);
2040 blob_write_uint32(blob, nir->shared_size);
2041 blob_write_uint32(blob, nir->scratch_size);
2042
2043 blob_write_uint32(blob, exec_list_length(&nir->functions));
2044 nir_foreach_function(fxn, nir) {
2045 write_function(&ctx, fxn);
2046 }
2047
2048 nir_foreach_function(fxn, nir) {
2049 if (fxn->impl)
2050 write_function_impl(&ctx, fxn->impl);
2051 }
2052
2053 blob_write_uint32(blob, nir->constant_data_size);
2054 if (nir->constant_data_size > 0)
2055 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2056
2057 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
2058
2059 _mesa_hash_table_destroy(ctx.remap_table, NULL);
2060 util_dynarray_fini(&ctx.phi_fixups);
2061 }
2062
2063 nir_shader *
nir_deserialize(void * mem_ctx,const struct nir_shader_compiler_options * options,struct blob_reader * blob)2064 nir_deserialize(void *mem_ctx,
2065 const struct nir_shader_compiler_options *options,
2066 struct blob_reader *blob)
2067 {
2068 read_ctx ctx = {0};
2069 ctx.blob = blob;
2070 list_inithead(&ctx.phi_srcs);
2071 ctx.idx_table_len = blob_read_uint32(blob);
2072 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2073
2074 uint32_t strings = blob_read_uint32(blob);
2075 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2076 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2077
2078 struct shader_info info;
2079 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2080
2081 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2082
2083 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2084 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2085
2086 ctx.nir->info = info;
2087
2088 read_var_list(&ctx, &ctx.nir->variables);
2089
2090 ctx.nir->num_inputs = blob_read_uint32(blob);
2091 ctx.nir->num_uniforms = blob_read_uint32(blob);
2092 ctx.nir->num_outputs = blob_read_uint32(blob);
2093 ctx.nir->shared_size = blob_read_uint32(blob);
2094 ctx.nir->scratch_size = blob_read_uint32(blob);
2095
2096 unsigned num_functions = blob_read_uint32(blob);
2097 for (unsigned i = 0; i < num_functions; i++)
2098 read_function(&ctx);
2099
2100 nir_foreach_function(fxn, ctx.nir) {
2101 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2102 fxn->impl = read_function_impl(&ctx, fxn);
2103 }
2104
2105 ctx.nir->constant_data_size = blob_read_uint32(blob);
2106 if (ctx.nir->constant_data_size > 0) {
2107 ctx.nir->constant_data =
2108 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2109 blob_copy_bytes(blob, ctx.nir->constant_data,
2110 ctx.nir->constant_data_size);
2111 }
2112
2113 free(ctx.idx_table);
2114
2115 return ctx.nir;
2116 }
2117
2118 void
nir_shader_serialize_deserialize(nir_shader * shader)2119 nir_shader_serialize_deserialize(nir_shader *shader)
2120 {
2121 const struct nir_shader_compiler_options *options = shader->options;
2122
2123 struct blob writer;
2124 blob_init(&writer);
2125 nir_serialize(&writer, shader, false);
2126
2127 /* Delete all of dest's ralloc children but leave dest alone */
2128 void *dead_ctx = ralloc_context(NULL);
2129 ralloc_adopt(dead_ctx, shader);
2130 ralloc_free(dead_ctx);
2131
2132 dead_ctx = ralloc_context(NULL);
2133
2134 struct blob_reader reader;
2135 blob_reader_init(&reader, writer.data, writer.size);
2136 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2137
2138 blob_finish(&writer);
2139
2140 nir_shader_replace(shader, copy);
2141 ralloc_free(dead_ctx);
2142 }
2143