1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_shader_internal.h"
25 #include "si_pipe.h"
26
27 #include "gallivm/lp_bld_const.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "gallivm/lp_bld_flow.h"
30 #include "gallivm/lp_bld_init.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_misc.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
39
40 #include <stdio.h>
41 #include <llvm-c/Transforms/IPO.h>
42 #include <llvm-c/Transforms/Scalar.h>
43
44 enum si_llvm_calling_convention {
45 RADEON_LLVM_AMDGPU_VS = 87,
46 RADEON_LLVM_AMDGPU_GS = 88,
47 RADEON_LLVM_AMDGPU_PS = 89,
48 RADEON_LLVM_AMDGPU_CS = 90,
49 RADEON_LLVM_AMDGPU_HS = 93,
50 };
51
si_llvm_add_attribute(LLVMValueRef F,const char * name,int value)52 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
53 {
54 char str[16];
55
56 snprintf(str, sizeof(str), "%i", value);
57 LLVMAddTargetDependentFunctionAttr(F, name, str);
58 }
59
60 struct si_llvm_diagnostics {
61 struct pipe_debug_callback *debug;
62 unsigned retval;
63 };
64
si_diagnostic_handler(LLVMDiagnosticInfoRef di,void * context)65 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
66 {
67 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
68 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
69 char *description = LLVMGetDiagInfoDescription(di);
70 const char *severity_str = NULL;
71
72 switch (severity) {
73 case LLVMDSError:
74 severity_str = "error";
75 break;
76 case LLVMDSWarning:
77 severity_str = "warning";
78 break;
79 case LLVMDSRemark:
80 severity_str = "remark";
81 break;
82 case LLVMDSNote:
83 severity_str = "note";
84 break;
85 default:
86 severity_str = "unknown";
87 }
88
89 pipe_debug_message(diag->debug, SHADER_INFO,
90 "LLVM diagnostic (%s): %s", severity_str, description);
91
92 if (severity == LLVMDSError) {
93 diag->retval = 1;
94 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
95 }
96
97 LLVMDisposeMessage(description);
98 }
99
100 /**
101 * Compile an LLVM module to machine code.
102 *
103 * @returns 0 for success, 1 for failure
104 */
si_llvm_compile(LLVMModuleRef M,struct ac_shader_binary * binary,LLVMTargetMachineRef tm,struct pipe_debug_callback * debug)105 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
106 LLVMTargetMachineRef tm,
107 struct pipe_debug_callback *debug)
108 {
109 struct si_llvm_diagnostics diag;
110 char *err;
111 LLVMContextRef llvm_ctx;
112 LLVMMemoryBufferRef out_buffer;
113 unsigned buffer_size;
114 const char *buffer_data;
115 LLVMBool mem_err;
116
117 diag.debug = debug;
118 diag.retval = 0;
119
120 /* Setup Diagnostic Handler*/
121 llvm_ctx = LLVMGetModuleContext(M);
122
123 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
124
125 /* Compile IR*/
126 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
127 &out_buffer);
128
129 /* Process Errors/Warnings */
130 if (mem_err) {
131 fprintf(stderr, "%s: %s", __FUNCTION__, err);
132 pipe_debug_message(debug, SHADER_INFO,
133 "LLVM emit error: %s", err);
134 FREE(err);
135 diag.retval = 1;
136 goto out;
137 }
138
139 /* Extract Shader Code*/
140 buffer_size = LLVMGetBufferSize(out_buffer);
141 buffer_data = LLVMGetBufferStart(out_buffer);
142
143 if (!ac_elf_read(buffer_data, buffer_size, binary)) {
144 fprintf(stderr, "radeonsi: cannot read an ELF shader binary\n");
145 diag.retval = 1;
146 }
147
148 /* Clean up */
149 LLVMDisposeMemoryBuffer(out_buffer);
150
151 out:
152 if (diag.retval != 0)
153 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
154 return diag.retval;
155 }
156
tgsi2llvmtype(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type type)157 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
158 enum tgsi_opcode_type type)
159 {
160 struct si_shader_context *ctx = si_shader_context(bld_base);
161
162 switch (type) {
163 case TGSI_TYPE_UNSIGNED:
164 case TGSI_TYPE_SIGNED:
165 return ctx->ac.i32;
166 case TGSI_TYPE_UNSIGNED64:
167 case TGSI_TYPE_SIGNED64:
168 return ctx->ac.i64;
169 case TGSI_TYPE_DOUBLE:
170 return ctx->ac.f64;
171 case TGSI_TYPE_UNTYPED:
172 case TGSI_TYPE_FLOAT:
173 return ctx->ac.f32;
174 default: break;
175 }
176 return 0;
177 }
178
bitcast(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type type,LLVMValueRef value)179 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
180 enum tgsi_opcode_type type, LLVMValueRef value)
181 {
182 struct si_shader_context *ctx = si_shader_context(bld_base);
183 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
184
185 if (dst_type)
186 return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, "");
187 else
188 return value;
189 }
190
191 /**
192 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
193 * or an undefined value in the same interval otherwise.
194 */
si_llvm_bound_index(struct si_shader_context * ctx,LLVMValueRef index,unsigned num)195 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
196 LLVMValueRef index,
197 unsigned num)
198 {
199 LLVMBuilderRef builder = ctx->ac.builder;
200 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
201 LLVMValueRef cc;
202
203 if (util_is_power_of_two(num)) {
204 index = LLVMBuildAnd(builder, index, c_max, "");
205 } else {
206 /* In theory, this MAX pattern should result in code that is
207 * as good as the bit-wise AND above.
208 *
209 * In practice, LLVM generates worse code (at the time of
210 * writing), because its value tracking is not strong enough.
211 */
212 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
213 index = LLVMBuildSelect(builder, cc, index, c_max, "");
214 }
215
216 return index;
217 }
218
emit_swizzle(struct lp_build_tgsi_context * bld_base,LLVMValueRef value,unsigned swizzle_x,unsigned swizzle_y,unsigned swizzle_z,unsigned swizzle_w)219 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
220 LLVMValueRef value,
221 unsigned swizzle_x,
222 unsigned swizzle_y,
223 unsigned swizzle_z,
224 unsigned swizzle_w)
225 {
226 struct si_shader_context *ctx = si_shader_context(bld_base);
227 LLVMValueRef swizzles[4];
228
229 swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0);
230 swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0);
231 swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0);
232 swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0);
233
234 return LLVMBuildShuffleVector(ctx->ac.builder,
235 value,
236 LLVMGetUndef(LLVMTypeOf(value)),
237 LLVMConstVector(swizzles, 4), "");
238 }
239
240 /**
241 * Return the description of the array covering the given temporary register
242 * index.
243 */
244 static unsigned
get_temp_array_id(struct lp_build_tgsi_context * bld_base,unsigned reg_index,const struct tgsi_ind_register * reg)245 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
246 unsigned reg_index,
247 const struct tgsi_ind_register *reg)
248 {
249 struct si_shader_context *ctx = si_shader_context(bld_base);
250 unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
251 unsigned i;
252
253 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
254 return reg->ArrayID;
255
256 for (i = 0; i < num_arrays; i++) {
257 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
258
259 if (reg_index >= array->range.First && reg_index <= array->range.Last)
260 return i + 1;
261 }
262
263 return 0;
264 }
265
266 static struct tgsi_declaration_range
get_array_range(struct lp_build_tgsi_context * bld_base,unsigned File,unsigned reg_index,const struct tgsi_ind_register * reg)267 get_array_range(struct lp_build_tgsi_context *bld_base,
268 unsigned File, unsigned reg_index,
269 const struct tgsi_ind_register *reg)
270 {
271 struct si_shader_context *ctx = si_shader_context(bld_base);
272 struct tgsi_declaration_range range;
273
274 if (File == TGSI_FILE_TEMPORARY) {
275 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
276 if (array_id)
277 return ctx->temp_arrays[array_id - 1].range;
278 }
279
280 range.First = 0;
281 range.Last = bld_base->info->file_max[File];
282 return range;
283 }
284
285 /**
286 * For indirect registers, construct a pointer directly to the requested
287 * element using getelementptr if possible.
288 *
289 * Returns NULL if the insertelement/extractelement fallback for array access
290 * must be used.
291 */
292 static LLVMValueRef
get_pointer_into_array(struct si_shader_context * ctx,unsigned file,unsigned swizzle,unsigned reg_index,const struct tgsi_ind_register * reg_indirect)293 get_pointer_into_array(struct si_shader_context *ctx,
294 unsigned file,
295 unsigned swizzle,
296 unsigned reg_index,
297 const struct tgsi_ind_register *reg_indirect)
298 {
299 unsigned array_id;
300 struct tgsi_array_info *array;
301 LLVMBuilderRef builder = ctx->ac.builder;
302 LLVMValueRef idxs[2];
303 LLVMValueRef index;
304 LLVMValueRef alloca;
305
306 if (file != TGSI_FILE_TEMPORARY)
307 return NULL;
308
309 array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
310 if (!array_id)
311 return NULL;
312
313 alloca = ctx->temp_array_allocas[array_id - 1];
314 if (!alloca)
315 return NULL;
316
317 array = &ctx->temp_arrays[array_id - 1];
318
319 if (!(array->writemask & (1 << swizzle)))
320 return ctx->undef_alloca;
321
322 index = si_get_indirect_index(ctx, reg_indirect, 1,
323 reg_index - ctx->temp_arrays[array_id - 1].range.First);
324
325 /* Ensure that the index is within a valid range, to guard against
326 * VM faults and overwriting critical data (e.g. spilled resource
327 * descriptors).
328 *
329 * TODO It should be possible to avoid the additional instructions
330 * if LLVM is changed so that it guarantuees:
331 * 1. the scratch space descriptor isolates the current wave (this
332 * could even save the scratch offset SGPR at the cost of an
333 * additional SALU instruction)
334 * 2. the memory for allocas must be allocated at the _end_ of the
335 * scratch space (after spilled registers)
336 */
337 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
338
339 index = LLVMBuildMul(
340 builder, index,
341 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
342 "");
343 index = LLVMBuildAdd(
344 builder, index,
345 LLVMConstInt(ctx->i32,
346 util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
347 "");
348 idxs[0] = ctx->i32_0;
349 idxs[1] = index;
350 return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
351 }
352
353 LLVMValueRef
si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context * bld_base,LLVMTypeRef type,LLVMValueRef ptr,LLVMValueRef ptr2)354 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
355 LLVMTypeRef type,
356 LLVMValueRef ptr,
357 LLVMValueRef ptr2)
358 {
359 struct si_shader_context *ctx = si_shader_context(bld_base);
360 LLVMValueRef result;
361
362 result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
363
364 result = LLVMBuildInsertElement(ctx->ac.builder,
365 result,
366 ac_to_integer(&ctx->ac, ptr),
367 ctx->i32_0, "");
368 result = LLVMBuildInsertElement(ctx->ac.builder,
369 result,
370 ac_to_integer(&ctx->ac, ptr2),
371 ctx->i32_1, "");
372 return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
373 }
374
375 static LLVMValueRef
emit_array_fetch(struct lp_build_tgsi_context * bld_base,unsigned File,enum tgsi_opcode_type type,struct tgsi_declaration_range range,unsigned swizzle)376 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
377 unsigned File, enum tgsi_opcode_type type,
378 struct tgsi_declaration_range range,
379 unsigned swizzle)
380 {
381 struct si_shader_context *ctx = si_shader_context(bld_base);
382 unsigned i, size = range.Last - range.First + 1;
383 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
384 LLVMValueRef result = LLVMGetUndef(vec);
385
386 struct tgsi_full_src_register tmp_reg = {};
387 tmp_reg.Register.File = File;
388
389 for (i = 0; i < size; ++i) {
390 tmp_reg.Register.Index = i + range.First;
391 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
392 result = LLVMBuildInsertElement(ctx->ac.builder, result, temp,
393 LLVMConstInt(ctx->i32, i, 0), "array_vector");
394 }
395 return result;
396 }
397
398 static LLVMValueRef
load_value_from_array(struct lp_build_tgsi_context * bld_base,unsigned file,enum tgsi_opcode_type type,unsigned swizzle,unsigned reg_index,const struct tgsi_ind_register * reg_indirect)399 load_value_from_array(struct lp_build_tgsi_context *bld_base,
400 unsigned file,
401 enum tgsi_opcode_type type,
402 unsigned swizzle,
403 unsigned reg_index,
404 const struct tgsi_ind_register *reg_indirect)
405 {
406 struct si_shader_context *ctx = si_shader_context(bld_base);
407 LLVMBuilderRef builder = ctx->ac.builder;
408 LLVMValueRef ptr;
409
410 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
411 if (ptr) {
412 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
413 if (tgsi_type_is_64bit(type)) {
414 LLVMValueRef ptr_hi, val_hi;
415 ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
416 val_hi = LLVMBuildLoad(builder, ptr_hi, "");
417 val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
418 val, val_hi);
419 }
420
421 return val;
422 } else {
423 struct tgsi_declaration_range range =
424 get_array_range(bld_base, file, reg_index, reg_indirect);
425 LLVMValueRef index =
426 si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
427 LLVMValueRef array =
428 emit_array_fetch(bld_base, file, type, range, swizzle);
429 return LLVMBuildExtractElement(builder, array, index, "");
430 }
431 }
432
433 static void
store_value_to_array(struct lp_build_tgsi_context * bld_base,LLVMValueRef value,unsigned file,unsigned chan_index,unsigned reg_index,const struct tgsi_ind_register * reg_indirect)434 store_value_to_array(struct lp_build_tgsi_context *bld_base,
435 LLVMValueRef value,
436 unsigned file,
437 unsigned chan_index,
438 unsigned reg_index,
439 const struct tgsi_ind_register *reg_indirect)
440 {
441 struct si_shader_context *ctx = si_shader_context(bld_base);
442 LLVMBuilderRef builder = ctx->ac.builder;
443 LLVMValueRef ptr;
444
445 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
446 if (ptr) {
447 LLVMBuildStore(builder, value, ptr);
448 } else {
449 unsigned i, size;
450 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
451 LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
452 LLVMValueRef array =
453 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
454 LLVMValueRef temp_ptr;
455
456 array = LLVMBuildInsertElement(builder, array, value, index, "");
457
458 size = range.Last - range.First + 1;
459 for (i = 0; i < size; ++i) {
460 switch(file) {
461 case TGSI_FILE_OUTPUT:
462 temp_ptr = ctx->outputs[i + range.First][chan_index];
463 break;
464
465 case TGSI_FILE_TEMPORARY:
466 if (range.First + i >= ctx->temps_count)
467 continue;
468 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
469 break;
470
471 default:
472 continue;
473 }
474 value = LLVMBuildExtractElement(builder, array,
475 LLVMConstInt(ctx->i32, i, 0), "");
476 LLVMBuildStore(builder, value, temp_ptr);
477 }
478 }
479 }
480
481 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
482 * reload them at each use. This must be true if the shader is using
483 * derivatives and KILL, because KILL can leave the WQM and then a lazy
484 * input load isn't in the WQM anymore.
485 */
si_preload_fs_inputs(struct si_shader_context * ctx)486 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
487 {
488 struct si_shader_selector *sel = ctx->shader->selector;
489
490 return sel->info.uses_derivatives &&
491 sel->info.uses_kill;
492 }
493
494 static LLVMValueRef
get_output_ptr(struct lp_build_tgsi_context * bld_base,unsigned index,unsigned chan)495 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
496 unsigned chan)
497 {
498 struct si_shader_context *ctx = si_shader_context(bld_base);
499
500 assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
501 return ctx->outputs[index][chan];
502 }
503
si_llvm_emit_fetch(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type type,unsigned swizzle)504 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
505 const struct tgsi_full_src_register *reg,
506 enum tgsi_opcode_type type,
507 unsigned swizzle)
508 {
509 struct si_shader_context *ctx = si_shader_context(bld_base);
510 LLVMBuilderRef builder = ctx->ac.builder;
511 LLVMValueRef result = NULL, ptr, ptr2;
512
513 if (swizzle == ~0) {
514 LLVMValueRef values[TGSI_NUM_CHANNELS];
515 unsigned chan;
516 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
517 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
518 }
519 return lp_build_gather_values(&ctx->gallivm, values,
520 TGSI_NUM_CHANNELS);
521 }
522
523 if (reg->Register.Indirect) {
524 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
525 swizzle, reg->Register.Index, ®->Indirect);
526 return bitcast(bld_base, type, load);
527 }
528
529 switch(reg->Register.File) {
530 case TGSI_FILE_IMMEDIATE: {
531 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
532 if (tgsi_type_is_64bit(type)) {
533 result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
534 result = LLVMConstInsertElement(result,
535 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
536 ctx->i32_0);
537 result = LLVMConstInsertElement(result,
538 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
539 ctx->i32_1);
540 return LLVMConstBitCast(result, ctype);
541 } else {
542 return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
543 }
544 }
545
546 case TGSI_FILE_INPUT: {
547 unsigned index = reg->Register.Index;
548 LLVMValueRef input[4];
549
550 /* I don't think doing this for vertex shaders is beneficial.
551 * For those, we want to make sure the VMEM loads are executed
552 * only once. Fragment shaders don't care much, because
553 * v_interp instructions are much cheaper than VMEM loads.
554 */
555 if (!si_preload_fs_inputs(ctx) &&
556 ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
557 ctx->load_input(ctx, index, &ctx->input_decls[index], input);
558 else
559 memcpy(input, &ctx->inputs[index * 4], sizeof(input));
560
561 result = input[swizzle];
562
563 if (tgsi_type_is_64bit(type)) {
564 ptr = result;
565 ptr2 = input[swizzle + 1];
566 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
567 ptr, ptr2);
568 }
569 break;
570 }
571
572 case TGSI_FILE_TEMPORARY:
573 if (reg->Register.Index >= ctx->temps_count)
574 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
575 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
576 if (tgsi_type_is_64bit(type)) {
577 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
578 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
579 LLVMBuildLoad(builder, ptr, ""),
580 LLVMBuildLoad(builder, ptr2, ""));
581 }
582 result = LLVMBuildLoad(builder, ptr, "");
583 break;
584
585 case TGSI_FILE_OUTPUT:
586 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
587 if (tgsi_type_is_64bit(type)) {
588 ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
589 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
590 LLVMBuildLoad(builder, ptr, ""),
591 LLVMBuildLoad(builder, ptr2, ""));
592 }
593 result = LLVMBuildLoad(builder, ptr, "");
594 break;
595
596 default:
597 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
598 }
599
600 return bitcast(bld_base, type, result);
601 }
602
fetch_system_value(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type type,unsigned swizzle)603 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
604 const struct tgsi_full_src_register *reg,
605 enum tgsi_opcode_type type,
606 unsigned swizzle)
607 {
608 struct si_shader_context *ctx = si_shader_context(bld_base);
609 LLVMBuilderRef builder = ctx->ac.builder;
610 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
611
612 if (tgsi_type_is_64bit(type)) {
613 LLVMValueRef lo, hi;
614
615 assert(swizzle == 0 || swizzle == 2);
616
617 lo = LLVMBuildExtractElement(
618 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
619 hi = LLVMBuildExtractElement(
620 builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
621
622 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
623 lo, hi);
624 }
625
626 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
627 cval = LLVMBuildExtractElement(
628 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
629 } else {
630 assert(swizzle == 0);
631 }
632
633 return bitcast(bld_base, type, cval);
634 }
635
emit_declaration(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_declaration * decl)636 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
637 const struct tgsi_full_declaration *decl)
638 {
639 struct si_shader_context *ctx = si_shader_context(bld_base);
640 LLVMBuilderRef builder = ctx->ac.builder;
641 unsigned first, last, i;
642 switch(decl->Declaration.File) {
643 case TGSI_FILE_ADDRESS:
644 {
645 unsigned idx;
646 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
647 unsigned chan;
648 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
649 ctx->addrs[idx][chan] = lp_build_alloca_undef(
650 &ctx->gallivm,
651 ctx->i32, "");
652 }
653 }
654 break;
655 }
656
657 case TGSI_FILE_TEMPORARY:
658 {
659 char name[16] = "";
660 LLVMValueRef array_alloca = NULL;
661 unsigned decl_size;
662 unsigned writemask = decl->Declaration.UsageMask;
663 first = decl->Range.First;
664 last = decl->Range.Last;
665 decl_size = 4 * ((last - first) + 1);
666
667 if (decl->Declaration.Array) {
668 unsigned id = decl->Array.ArrayID - 1;
669 unsigned array_size;
670
671 writemask &= ctx->temp_arrays[id].writemask;
672 ctx->temp_arrays[id].writemask = writemask;
673 array_size = ((last - first) + 1) * util_bitcount(writemask);
674
675 /* If the array has more than 16 elements, store it
676 * in memory using an alloca that spans the entire
677 * array.
678 *
679 * Otherwise, store each array element individually.
680 * We will then generate vectors (per-channel, up to
681 * <16 x float> if the usagemask is a single bit) for
682 * indirect addressing.
683 *
684 * Note that 16 is the number of vector elements that
685 * LLVM will store in a register, so theoretically an
686 * array with up to 4 * 16 = 64 elements could be
687 * handled this way, but whether that's a good idea
688 * depends on VGPR register pressure elsewhere.
689 *
690 * FIXME: We shouldn't need to have the non-alloca
691 * code path for arrays. LLVM should be smart enough to
692 * promote allocas into registers when profitable.
693 */
694 if (array_size > 16 ||
695 !ctx->screen->llvm_has_working_vgpr_indexing) {
696 array_alloca = lp_build_alloca_undef(&ctx->gallivm,
697 LLVMArrayType(ctx->f32,
698 array_size), "array");
699 ctx->temp_array_allocas[id] = array_alloca;
700 }
701 }
702
703 if (!ctx->temps_count) {
704 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
705 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
706 }
707 if (!array_alloca) {
708 for (i = 0; i < decl_size; ++i) {
709 #ifdef DEBUG
710 snprintf(name, sizeof(name), "TEMP%d.%c",
711 first + i / 4, "xyzw"[i % 4]);
712 #endif
713 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
714 lp_build_alloca_undef(&ctx->gallivm,
715 ctx->f32,
716 name);
717 }
718 } else {
719 LLVMValueRef idxs[2] = {
720 ctx->i32_0,
721 NULL
722 };
723 unsigned j = 0;
724
725 if (writemask != TGSI_WRITEMASK_XYZW &&
726 !ctx->undef_alloca) {
727 /* Create a dummy alloca. We use it so that we
728 * have a pointer that is safe to load from if
729 * a shader ever reads from a channel that
730 * it never writes to.
731 */
732 ctx->undef_alloca = lp_build_alloca_undef(
733 &ctx->gallivm,
734 ctx->f32, "undef");
735 }
736
737 for (i = 0; i < decl_size; ++i) {
738 LLVMValueRef ptr;
739 if (writemask & (1 << (i % 4))) {
740 #ifdef DEBUG
741 snprintf(name, sizeof(name), "TEMP%d.%c",
742 first + i / 4, "xyzw"[i % 4]);
743 #endif
744 idxs[1] = LLVMConstInt(ctx->i32, j, 0);
745 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
746 j++;
747 } else {
748 ptr = ctx->undef_alloca;
749 }
750 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
751 }
752 }
753 break;
754 }
755 case TGSI_FILE_INPUT:
756 {
757 unsigned idx;
758 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
759 if (ctx->load_input &&
760 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
761 ctx->input_decls[idx] = *decl;
762 ctx->input_decls[idx].Range.First = idx;
763 ctx->input_decls[idx].Range.Last = idx;
764 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
765
766 if (si_preload_fs_inputs(ctx) ||
767 bld_base->info->processor != PIPE_SHADER_FRAGMENT)
768 ctx->load_input(ctx, idx, &ctx->input_decls[idx],
769 &ctx->inputs[idx * 4]);
770 }
771 }
772 }
773 break;
774
775 case TGSI_FILE_SYSTEM_VALUE:
776 {
777 unsigned idx;
778 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
779 si_load_system_value(ctx, idx, decl);
780 }
781 }
782 break;
783
784 case TGSI_FILE_OUTPUT:
785 {
786 char name[16] = "";
787 unsigned idx;
788 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
789 unsigned chan;
790 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
791 if (ctx->outputs[idx][0])
792 continue;
793 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
794 #ifdef DEBUG
795 snprintf(name, sizeof(name), "OUT%d.%c",
796 idx, "xyzw"[chan % 4]);
797 #endif
798 ctx->outputs[idx][chan] = lp_build_alloca_undef(
799 &ctx->gallivm,
800 ctx->f32, name);
801 }
802 }
803 break;
804 }
805
806 case TGSI_FILE_MEMORY:
807 si_declare_compute_memory(ctx, decl);
808 break;
809
810 default:
811 break;
812 }
813 }
814
si_llvm_emit_store(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,unsigned index,LLVMValueRef dst[4])815 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
816 const struct tgsi_full_instruction *inst,
817 const struct tgsi_opcode_info *info,
818 unsigned index,
819 LLVMValueRef dst[4])
820 {
821 struct si_shader_context *ctx = si_shader_context(bld_base);
822 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
823 LLVMBuilderRef builder = ctx->ac.builder;
824 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
825 bool is_vec_store = false;
826 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
827
828 if (dst[0]) {
829 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
830 is_vec_store = (k == LLVMVectorTypeKind);
831 }
832
833 if (is_vec_store) {
834 LLVMValueRef values[4] = {};
835 uint32_t writemask = reg->Register.WriteMask;
836 while (writemask) {
837 unsigned chan = u_bit_scan(&writemask);
838 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
839 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
840 dst[0], index, "");
841 }
842 bld_base->emit_store(bld_base, inst, info, index, values);
843 return;
844 }
845
846 uint32_t writemask = reg->Register.WriteMask;
847 while (writemask) {
848 unsigned chan_index = u_bit_scan(&writemask);
849 LLVMValueRef value = dst[chan_index];
850
851 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
852 continue;
853 if (inst->Instruction.Saturate)
854 value = ac_build_clamp(&ctx->ac, value);
855
856 if (reg->Register.File == TGSI_FILE_ADDRESS) {
857 temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
858 LLVMBuildStore(builder, value, temp_ptr);
859 continue;
860 }
861
862 if (!tgsi_type_is_64bit(dtype))
863 value = ac_to_float(&ctx->ac, value);
864
865 if (reg->Register.Indirect) {
866 unsigned file = reg->Register.File;
867 unsigned reg_index = reg->Register.Index;
868 store_value_to_array(bld_base, value, file, chan_index,
869 reg_index, ®->Indirect);
870 } else {
871 switch(reg->Register.File) {
872 case TGSI_FILE_OUTPUT:
873 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
874 if (tgsi_type_is_64bit(dtype))
875 temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
876 break;
877
878 case TGSI_FILE_TEMPORARY:
879 {
880 if (reg->Register.Index >= ctx->temps_count)
881 continue;
882
883 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
884 if (tgsi_type_is_64bit(dtype))
885 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
886
887 break;
888 }
889 default:
890 return;
891 }
892 if (!tgsi_type_is_64bit(dtype))
893 LLVMBuildStore(builder, value, temp_ptr);
894 else {
895 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
896 LLVMVectorType(ctx->i32, 2), "");
897 LLVMValueRef val2;
898 value = LLVMBuildExtractElement(builder, ptr,
899 ctx->i32_0, "");
900 val2 = LLVMBuildExtractElement(builder, ptr,
901 ctx->i32_1, "");
902
903 LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr);
904 LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2);
905 }
906 }
907 }
908 }
909
get_line(int pc)910 static int get_line(int pc)
911 {
912 /* Subtract 1 so that the number shown is that of the corresponding
913 * opcode in the TGSI dump, e.g. an if block has the same suffix as
914 * the instruction number of the corresponding TGSI IF.
915 */
916 return pc - 1;
917 }
918
bgnloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)919 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
920 struct lp_build_tgsi_context *bld_base,
921 struct lp_build_emit_data *emit_data)
922 {
923 struct si_shader_context *ctx = si_shader_context(bld_base);
924 ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc));
925 }
926
brk_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)927 static void brk_emit(const struct lp_build_tgsi_action *action,
928 struct lp_build_tgsi_context *bld_base,
929 struct lp_build_emit_data *emit_data)
930 {
931 struct si_shader_context *ctx = si_shader_context(bld_base);
932 ac_build_break(&ctx->ac);
933 }
934
cont_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)935 static void cont_emit(const struct lp_build_tgsi_action *action,
936 struct lp_build_tgsi_context *bld_base,
937 struct lp_build_emit_data *emit_data)
938 {
939 struct si_shader_context *ctx = si_shader_context(bld_base);
940 ac_build_continue(&ctx->ac);
941 }
942
else_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)943 static void else_emit(const struct lp_build_tgsi_action *action,
944 struct lp_build_tgsi_context *bld_base,
945 struct lp_build_emit_data *emit_data)
946 {
947 struct si_shader_context *ctx = si_shader_context(bld_base);
948 ac_build_else(&ctx->ac, get_line(bld_base->pc));
949 }
950
endif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)951 static void endif_emit(const struct lp_build_tgsi_action *action,
952 struct lp_build_tgsi_context *bld_base,
953 struct lp_build_emit_data *emit_data)
954 {
955 struct si_shader_context *ctx = si_shader_context(bld_base);
956 ac_build_endif(&ctx->ac, get_line(bld_base->pc));
957 }
958
endloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)959 static void endloop_emit(const struct lp_build_tgsi_action *action,
960 struct lp_build_tgsi_context *bld_base,
961 struct lp_build_emit_data *emit_data)
962 {
963 struct si_shader_context *ctx = si_shader_context(bld_base);
964 ac_build_endloop(&ctx->ac, get_line(bld_base->pc));
965 }
966
if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)967 static void if_emit(const struct lp_build_tgsi_action *action,
968 struct lp_build_tgsi_context *bld_base,
969 struct lp_build_emit_data *emit_data)
970 {
971 struct si_shader_context *ctx = si_shader_context(bld_base);
972 ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
973 }
974
uif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)975 static void uif_emit(const struct lp_build_tgsi_action *action,
976 struct lp_build_tgsi_context *bld_base,
977 struct lp_build_emit_data *emit_data)
978 {
979 struct si_shader_context *ctx = si_shader_context(bld_base);
980 ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
981 }
982
emit_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_immediate * imm)983 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
984 const struct tgsi_full_immediate *imm)
985 {
986 unsigned i;
987 struct si_shader_context *ctx = si_shader_context(bld_base);
988
989 for (i = 0; i < 4; ++i) {
990 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
991 LLVMConstInt(ctx->i32, imm->u[i].Uint, false );
992 }
993
994 ctx->imms_num++;
995 }
996
si_llvm_context_init(struct si_shader_context * ctx,struct si_screen * sscreen,LLVMTargetMachineRef tm)997 void si_llvm_context_init(struct si_shader_context *ctx,
998 struct si_screen *sscreen,
999 LLVMTargetMachineRef tm)
1000 {
1001 struct lp_type type;
1002
1003 /* Initialize the gallivm object:
1004 * We are only using the module, context, and builder fields of this struct.
1005 * This should be enough for us to be able to pass our gallivm struct to the
1006 * helper functions in the gallivm module.
1007 */
1008 memset(ctx, 0, sizeof(*ctx));
1009 ctx->screen = sscreen;
1010 ctx->tm = tm;
1011
1012 ctx->gallivm.context = LLVMContextCreate();
1013 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1014 ctx->gallivm.context);
1015 LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1016
1017 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1018 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1019 LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1020 LLVMDisposeTargetData(data_layout);
1021 LLVMDisposeMessage(data_layout_str);
1022
1023 bool unsafe_fpmath = (sscreen->debug_flags & DBG(UNSAFE_MATH)) != 0;
1024 enum ac_float_mode float_mode =
1025 unsafe_fpmath ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
1026 AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1027
1028 ctx->gallivm.builder = ac_create_builder(ctx->gallivm.context,
1029 float_mode);
1030
1031 ac_llvm_context_init(&ctx->ac, ctx->gallivm.context,
1032 sscreen->info.chip_class, sscreen->info.family);
1033 ctx->ac.module = ctx->gallivm.module;
1034 ctx->ac.builder = ctx->gallivm.builder;
1035
1036 struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1037
1038 type.floating = true;
1039 type.fixed = false;
1040 type.sign = true;
1041 type.norm = false;
1042 type.width = 32;
1043 type.length = 1;
1044
1045 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1046 lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1047 lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1048 type.width *= 2;
1049 lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1050 lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1051 lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1052
1053 bld_base->soa = 1;
1054 bld_base->emit_swizzle = emit_swizzle;
1055 bld_base->emit_declaration = emit_declaration;
1056 bld_base->emit_immediate = emit_immediate;
1057
1058 /* metadata allowing 2.5 ULP */
1059 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->ac.context,
1060 "fpmath", 6);
1061 LLVMValueRef arg = LLVMConstReal(ctx->ac.f32, 2.5);
1062 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->ac.context,
1063 &arg, 1);
1064
1065 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1066 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1067 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1068 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1069 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1070 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1071 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1072 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1073
1074 si_shader_context_init_alu(&ctx->bld_base);
1075 si_shader_context_init_mem(ctx);
1076
1077 ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
1078 ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
1079 ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
1080 ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
1081 ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
1082 ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
1083 ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
1084 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1085 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1086 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1087 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1088
1089 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1090 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1091 }
1092
1093 /* Set the context to a certain TGSI shader. Can be called repeatedly
1094 * to change the shader. */
si_llvm_context_set_tgsi(struct si_shader_context * ctx,struct si_shader * shader)1095 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1096 struct si_shader *shader)
1097 {
1098 const struct tgsi_shader_info *info = NULL;
1099 const struct tgsi_token *tokens = NULL;
1100
1101 if (shader && shader->selector) {
1102 info = &shader->selector->info;
1103 tokens = shader->selector->tokens;
1104 }
1105
1106 ctx->shader = shader;
1107 ctx->type = info ? info->processor : -1;
1108 ctx->bld_base.info = info;
1109
1110 /* Clean up the old contents. */
1111 FREE(ctx->temp_arrays);
1112 ctx->temp_arrays = NULL;
1113 FREE(ctx->temp_array_allocas);
1114 ctx->temp_array_allocas = NULL;
1115
1116 FREE(ctx->imms);
1117 ctx->imms = NULL;
1118 ctx->imms_num = 0;
1119
1120 FREE(ctx->temps);
1121 ctx->temps = NULL;
1122 ctx->temps_count = 0;
1123
1124 if (!info || !tokens)
1125 return;
1126
1127 if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1128 int size = info->array_max[TGSI_FILE_TEMPORARY];
1129
1130 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1131 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1132
1133 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1134 ctx->temp_arrays);
1135 }
1136 if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1137 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1138 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1139 }
1140
1141 /* Re-set these to start with a clean slate. */
1142 ctx->bld_base.num_instructions = 0;
1143 ctx->bld_base.pc = 0;
1144 memset(ctx->outputs, 0, sizeof(ctx->outputs));
1145
1146 ctx->bld_base.emit_store = si_llvm_emit_store;
1147 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1148 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1149 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1150 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1151 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1152
1153 ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
1154 ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
1155 ctx->num_samplers = util_last_bit(info->samplers_declared);
1156 ctx->num_images = util_last_bit(info->images_declared);
1157 }
1158
si_llvm_create_func(struct si_shader_context * ctx,const char * name,LLVMTypeRef * return_types,unsigned num_return_elems,LLVMTypeRef * ParamTypes,unsigned ParamCount)1159 void si_llvm_create_func(struct si_shader_context *ctx,
1160 const char *name,
1161 LLVMTypeRef *return_types, unsigned num_return_elems,
1162 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1163 {
1164 LLVMTypeRef main_fn_type, ret_type;
1165 LLVMBasicBlockRef main_fn_body;
1166 enum si_llvm_calling_convention call_conv;
1167 unsigned real_shader_type;
1168
1169 if (num_return_elems)
1170 ret_type = LLVMStructTypeInContext(ctx->ac.context,
1171 return_types,
1172 num_return_elems, true);
1173 else
1174 ret_type = ctx->voidt;
1175
1176 /* Setup the function */
1177 ctx->return_type = ret_type;
1178 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1179 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1180 main_fn_body = LLVMAppendBasicBlockInContext(ctx->ac.context,
1181 ctx->main_fn, "main_body");
1182 LLVMPositionBuilderAtEnd(ctx->ac.builder, main_fn_body);
1183
1184 real_shader_type = ctx->type;
1185
1186 /* LS is merged into HS (TCS), and ES is merged into GS. */
1187 if (ctx->screen->info.chip_class >= GFX9) {
1188 if (ctx->shader->key.as_ls)
1189 real_shader_type = PIPE_SHADER_TESS_CTRL;
1190 else if (ctx->shader->key.as_es)
1191 real_shader_type = PIPE_SHADER_GEOMETRY;
1192 }
1193
1194 switch (real_shader_type) {
1195 case PIPE_SHADER_VERTEX:
1196 case PIPE_SHADER_TESS_EVAL:
1197 call_conv = RADEON_LLVM_AMDGPU_VS;
1198 break;
1199 case PIPE_SHADER_TESS_CTRL:
1200 call_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS :
1201 RADEON_LLVM_AMDGPU_VS;
1202 break;
1203 case PIPE_SHADER_GEOMETRY:
1204 call_conv = RADEON_LLVM_AMDGPU_GS;
1205 break;
1206 case PIPE_SHADER_FRAGMENT:
1207 call_conv = RADEON_LLVM_AMDGPU_PS;
1208 break;
1209 case PIPE_SHADER_COMPUTE:
1210 call_conv = RADEON_LLVM_AMDGPU_CS;
1211 break;
1212 default:
1213 unreachable("Unhandle shader type");
1214 }
1215
1216 LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1217 }
1218
si_llvm_optimize_module(struct si_shader_context * ctx)1219 void si_llvm_optimize_module(struct si_shader_context *ctx)
1220 {
1221 struct gallivm_state *gallivm = &ctx->gallivm;
1222 const char *triple = LLVMGetTarget(gallivm->module);
1223 LLVMTargetLibraryInfoRef target_library_info;
1224
1225 /* Dump LLVM IR before any optimization passes */
1226 if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
1227 si_can_dump_shader(ctx->screen, ctx->type))
1228 LLVMDumpModule(ctx->gallivm.module);
1229
1230 /* Create the pass manager */
1231 gallivm->passmgr = LLVMCreatePassManager();
1232
1233 target_library_info = gallivm_create_target_library_info(triple);
1234 LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1235
1236 if (si_extra_shader_checks(ctx->screen, ctx->type))
1237 LLVMAddVerifierPass(gallivm->passmgr);
1238
1239 LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1240
1241 /* This pass should eliminate all the load and store instructions */
1242 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1243
1244 /* Add some optimization passes */
1245 LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1246 LLVMAddLICMPass(gallivm->passmgr);
1247 LLVMAddAggressiveDCEPass(gallivm->passmgr);
1248 LLVMAddCFGSimplificationPass(gallivm->passmgr);
1249 #if HAVE_LLVM >= 0x0400
1250 /* This is recommended by the instruction combining pass. */
1251 LLVMAddEarlyCSEMemSSAPass(gallivm->passmgr);
1252 #endif
1253 LLVMAddInstructionCombiningPass(gallivm->passmgr);
1254
1255 /* Run the pass */
1256 LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1257
1258 LLVMDisposeBuilder(ctx->ac.builder);
1259 LLVMDisposePassManager(gallivm->passmgr);
1260 gallivm_dispose_target_library_info(target_library_info);
1261 }
1262
si_llvm_dispose(struct si_shader_context * ctx)1263 void si_llvm_dispose(struct si_shader_context *ctx)
1264 {
1265 LLVMDisposeModule(ctx->gallivm.module);
1266 LLVMContextDispose(ctx->gallivm.context);
1267 FREE(ctx->temp_arrays);
1268 ctx->temp_arrays = NULL;
1269 FREE(ctx->temp_array_allocas);
1270 ctx->temp_array_allocas = NULL;
1271 FREE(ctx->temps);
1272 ctx->temps = NULL;
1273 ctx->temps_count = 0;
1274 FREE(ctx->imms);
1275 ctx->imms = NULL;
1276 ctx->imms_num = 0;
1277 ac_llvm_context_dispose(&ctx->ac);
1278 }
1279