Lines Matching refs:sctx
224 struct si_context *sctx = (struct si_context *)ctx; in si_create_compute_state() local
251 sel->compiler_ctx_state.debug = sctx->debug; in si_create_compute_state()
252 sel->compiler_ctx_state.is_debug_context = sctx->is_debug; in si_create_compute_state()
255 si_schedule_initial_compile(sctx, MESA_SHADER_COMPUTE, &sel->ready, &sel->compiler_ctx_state, in si_create_compute_state()
272 si_shader_dump(sctx->screen, &program->shader, &sctx->debug, stderr, true); in si_create_compute_state()
273 if (!si_shader_binary_upload(sctx->screen, &program->shader, 0)) { in si_create_compute_state()
286 struct si_context *sctx = (struct si_context *)ctx; in si_bind_compute_state() local
290 sctx->cs_shader_state.program = program; in si_bind_compute_state()
298 si_set_active_descriptors(sctx, in si_bind_compute_state()
301 si_set_active_descriptors(sctx, SI_DESCS_FIRST_COMPUTE + SI_SHADER_DESCS_SAMPLERS_AND_IMAGES, in si_bind_compute_state()
304 sctx->compute_shaderbuf_sgprs_dirty = true; in si_bind_compute_state()
305 sctx->compute_image_sgprs_dirty = true; in si_bind_compute_state()
312 struct si_context *sctx = (struct si_context *)ctx; in si_set_global_binding() local
313 struct si_compute *program = sctx->cs_shader_state.program; in si_set_global_binding()
348 void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf *cs) in si_emit_initial_compute_regs() argument
350 uint64_t bc_va = sctx->border_color_buffer->gpu_address; in si_emit_initial_compute_regs()
358 if (sctx->chip_class == GFX6) { in si_emit_initial_compute_regs()
368 if (sctx->screen->info.si_TA_CS_BC_BASE_ADDR_allowed) in si_emit_initial_compute_regs()
372 if (sctx->chip_class >= GFX7) { in si_emit_initial_compute_regs()
379 if (cs != sctx->gfx_cs || !sctx->screen->info.has_graphics) { in si_emit_initial_compute_regs()
393 if (sctx->chip_class >= GFX9 && in si_emit_initial_compute_regs()
394 (cs != sctx->gfx_cs || !sctx->screen->info.has_graphics)) { in si_emit_initial_compute_regs()
396 sctx->chip_class >= GFX10 ? 0x20 : 0); in si_emit_initial_compute_regs()
399 if (sctx->chip_class >= GFX10) { in si_emit_initial_compute_regs()
409 static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader, in si_setup_compute_scratch_buffer() argument
414 scratch_needed = config->scratch_bytes_per_wave * sctx->scratch_waves; in si_setup_compute_scratch_buffer()
415 if (sctx->compute_scratch_buffer) in si_setup_compute_scratch_buffer()
416 scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0; in si_setup_compute_scratch_buffer()
419 si_resource_reference(&sctx->compute_scratch_buffer, NULL); in si_setup_compute_scratch_buffer()
421 sctx->compute_scratch_buffer = in si_setup_compute_scratch_buffer()
422 si_aligned_buffer_create(&sctx->screen->b, in si_setup_compute_scratch_buffer()
425 scratch_needed, sctx->screen->info.pte_fragment_size); in si_setup_compute_scratch_buffer()
427 if (!sctx->compute_scratch_buffer) in si_setup_compute_scratch_buffer()
431 if (sctx->compute_scratch_buffer != shader->scratch_bo && scratch_needed) { in si_setup_compute_scratch_buffer()
432 uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address; in si_setup_compute_scratch_buffer()
434 if (!si_shader_binary_upload(sctx->screen, shader, scratch_va)) in si_setup_compute_scratch_buffer()
437 si_resource_reference(&shader->scratch_bo, sctx->compute_scratch_buffer); in si_setup_compute_scratch_buffer()
443 static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute *program, in si_switch_compute_shader() argument
447 struct radeon_cmdbuf *cs = sctx->gfx_cs; in si_switch_compute_shader()
454 if (sctx->cs_shader_state.emitted_program == program && sctx->cs_shader_state.offset == offset) in si_switch_compute_shader()
471 if (sctx->chip_class <= GFX6) { in si_switch_compute_shader()
484 if (!si_setup_compute_scratch_buffer(sctx, shader, config)) in si_switch_compute_shader()
488 COMPUTE_DBG(sctx->screen, in si_switch_compute_shader()
491 sctx->scratch_waves, config->scratch_bytes_per_wave, in si_switch_compute_shader()
492 config->scratch_bytes_per_wave * sctx->scratch_waves); in si_switch_compute_shader()
494 radeon_add_to_buffer_list(sctx, sctx->gfx_cs, shader->scratch_bo, RADEON_USAGE_READWRITE, in si_switch_compute_shader()
505 radeon_add_to_buffer_list(sctx, sctx->gfx_cs, shader->bo, RADEON_USAGE_READ, in si_switch_compute_shader()
516 COMPUTE_DBG(sctx->screen, in si_switch_compute_shader()
521 sctx->max_seen_compute_scratch_bytes_per_wave = in si_switch_compute_shader()
522 MAX2(sctx->max_seen_compute_scratch_bytes_per_wave, config->scratch_bytes_per_wave); in si_switch_compute_shader()
525 S_00B860_WAVES(sctx->scratch_waves) | in si_switch_compute_shader()
526 S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10)); in si_switch_compute_shader()
528 sctx->cs_shader_state.emitted_program = program; in si_switch_compute_shader()
529 sctx->cs_shader_state.offset = offset; in si_switch_compute_shader()
530 sctx->cs_shader_state.uses_scratch = config->scratch_bytes_per_wave != 0; in si_switch_compute_shader()
536 static void setup_scratch_rsrc_user_sgprs(struct si_context *sctx, in setup_scratch_rsrc_user_sgprs() argument
539 struct radeon_cmdbuf *cs = sctx->gfx_cs; in setup_scratch_rsrc_user_sgprs()
540 uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address; in setup_scratch_rsrc_user_sgprs()
553 if (sctx->chip_class >= GFX9) { in setup_scratch_rsrc_user_sgprs()
558 if (sctx->chip_class < GFX8) { in setup_scratch_rsrc_user_sgprs()
572 static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_code_t *code_object, in si_setup_user_sgprs_co_v2() argument
575 struct si_compute *program = sctx->cs_shader_state.program; in si_setup_user_sgprs_co_v2()
576 struct radeon_cmdbuf *cs = sctx->gfx_cs; in si_setup_user_sgprs_co_v2()
587 setup_scratch_rsrc_user_sgprs(sctx, code_object, user_sgpr); in si_setup_user_sgprs_co_v2()
614 u_upload_data(sctx->b.const_uploader, 0, sizeof(dispatch), 256, &dispatch, &dispatch_offset, in si_setup_user_sgprs_co_v2()
621 radeon_add_to_buffer_list(sctx, sctx->gfx_cs, dispatch_buf, RADEON_USAGE_READ, in si_setup_user_sgprs_co_v2()
651 static bool si_upload_compute_input(struct si_context *sctx, const amd_kernel_code_t *code_object, in si_upload_compute_input() argument
654 struct si_compute *program = sctx->cs_shader_state.program; in si_upload_compute_input()
661 u_upload_alloc(sctx->b.const_uploader, 0, program->input_size, in si_upload_compute_input()
662 sctx->screen->info.tcc_cache_line_size, &kernel_args_offset, in si_upload_compute_input()
674 COMPUTE_DBG(sctx->screen, "input %u : %u\n", i, kernel_args[i]); in si_upload_compute_input()
677 radeon_add_to_buffer_list(sctx, sctx->gfx_cs, input_buffer, RADEON_USAGE_READ, in si_upload_compute_input()
680 si_setup_user_sgprs_co_v2(sctx, code_object, info, kernel_args_va); in si_upload_compute_input()
685 static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_grid_info *info) in si_setup_nir_user_data() argument
687 struct si_compute *program = sctx->cs_shader_state.program; in si_setup_nir_user_data()
689 struct radeon_cmdbuf *cs = sctx->gfx_cs; in si_setup_nir_user_data()
699 si_cp_copy_data(sctx, sctx->gfx_cs, COPY_DATA_REG, NULL, (grid_size_reg >> 2) + i, in si_setup_nir_user_data()
720 radeon_emit_array(cs, sctx->cs_user_data, sel->info.base.cs.user_data_components_amd); in si_setup_nir_user_data()
724 static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_grid_info *info) in si_emit_dispatch_packets() argument
726 struct si_screen *sscreen = sctx->screen; in si_emit_dispatch_packets()
727 struct radeon_cmdbuf *cs = sctx->gfx_cs; in si_emit_dispatch_packets()
728 bool render_cond_bit = sctx->render_cond && !sctx->render_cond_force_off; in si_emit_dispatch_packets()
734 if (sctx->chip_class >= GFX10 && waves_per_threadgroup == 1) in si_emit_dispatch_packets()
740 sctx->cs_max_waves_per_sh, threadgroups_per_cu)); in si_emit_dispatch_packets()
745 S_00B800_ORDER_MODE(sctx->chip_class >= GFX7) | in si_emit_dispatch_packets()
778 radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(info->indirect), RADEON_USAGE_READ, in si_emit_dispatch_packets()
800 struct si_context *sctx = (struct si_context *)ctx; in si_launch_grid() local
801 struct si_compute *program = sctx->cs_shader_state.program; in si_launch_grid()
811 (sctx->chip_class == GFX6 || sctx->family == CHIP_BONAIRE || sctx->family == CHIP_KABINI) && in si_launch_grid()
815 sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH; in si_launch_grid()
820 if (sctx->has_graphics) { in si_launch_grid()
821 if (sctx->last_num_draw_calls != sctx->num_draw_calls) { in si_launch_grid()
822 si_update_fb_dirtiness_after_rendering(sctx); in si_launch_grid()
823 sctx->last_num_draw_calls = sctx->num_draw_calls; in si_launch_grid()
826 si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE); in si_launch_grid()
830 si_context_add_resource_size(sctx, &program->shader.bo->b.b); in si_launch_grid()
834 si_context_add_resource_size(sctx, info->indirect); in si_launch_grid()
837 if (sctx->chip_class <= GFX8 && si_resource(info->indirect)->TC_L2_dirty) { in si_launch_grid()
838 sctx->flags |= SI_CONTEXT_WB_L2; in si_launch_grid()
843 si_need_gfx_cs_space(sctx, 0); in si_launch_grid()
846 if (unlikely(radeon_uses_secure_bos(sctx->ws))) { in si_launch_grid()
847 bool secure = si_compute_resources_check_encrypted(sctx); in si_launch_grid()
848 if (secure != sctx->ws->cs_is_secure(sctx->gfx_cs)) { in si_launch_grid()
849 si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW | in si_launch_grid()
855 if (sctx->bo_list_add_all_compute_resources) in si_launch_grid()
856 si_compute_resources_add_all_to_bo_list(sctx); in si_launch_grid()
858 if (!sctx->cs_shader_state.initialized) { in si_launch_grid()
859 si_emit_initial_compute_regs(sctx, sctx->gfx_cs); in si_launch_grid()
861 sctx->cs_shader_state.emitted_program = NULL; in si_launch_grid()
862 sctx->cs_shader_state.initialized = true; in si_launch_grid()
867 if (!si_switch_compute_shader(sctx, program, &program->shader, code_object, info->pc, &prefetch)) in si_launch_grid()
870 si_upload_compute_shader_descriptors(sctx); in si_launch_grid()
871 si_emit_compute_shader_pointers(sctx); in si_launch_grid()
874 unlikely(!si_upload_compute_input(sctx, code_object, info))) in si_launch_grid()
883 radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer, RADEON_USAGE_READWRITE, in si_launch_grid()
888 if (sctx->flags) in si_launch_grid()
889 sctx->emit_cache_flush(sctx); in si_launch_grid()
891 if (sctx->has_graphics && si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) { in si_launch_grid()
892 sctx->atoms.s.render_cond.emit(sctx); in si_launch_grid()
893 si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond, false); in si_launch_grid()
897 if (sctx->chip_class >= GFX7 && prefetch) in si_launch_grid()
898 cik_prefetch_TC_L2_async(sctx, &program->shader.bo->b.b, 0, program->shader.bo->b.b.width0); in si_launch_grid()
901 si_setup_nir_user_data(sctx, info); in si_launch_grid()
903 si_emit_dispatch_packets(sctx, info); in si_launch_grid()
905 if (unlikely(sctx->current_saved_cs)) { in si_launch_grid()
906 si_trace_emit(sctx); in si_launch_grid()
907 si_log_compute_state(sctx, sctx->log); in si_launch_grid()
910 sctx->compute_is_busy = true; in si_launch_grid()
911 sctx->num_compute_calls++; in si_launch_grid()
912 if (sctx->cs_shader_state.uses_scratch) in si_launch_grid()
913 sctx->num_spill_compute_calls++; in si_launch_grid()
916 sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; in si_launch_grid()
940 struct si_context *sctx = (struct si_context *)ctx; in si_delete_compute_state() local
945 if (program == sctx->cs_shader_state.program) in si_delete_compute_state()
946 sctx->cs_shader_state.program = NULL; in si_delete_compute_state()
948 if (program == sctx->cs_shader_state.emitted_program) in si_delete_compute_state()
949 sctx->cs_shader_state.emitted_program = NULL; in si_delete_compute_state()
959 void si_init_compute_functions(struct si_context *sctx) in si_init_compute_functions() argument
961 sctx->b.create_compute_state = si_create_compute_state; in si_init_compute_functions()
962 sctx->b.delete_compute_state = si_delete_compute_state; in si_init_compute_functions()
963 sctx->b.bind_compute_state = si_bind_compute_state; in si_init_compute_functions()
964 sctx->b.set_compute_resources = si_set_compute_resources; in si_init_compute_functions()
965 sctx->b.set_global_binding = si_set_global_binding; in si_init_compute_functions()
966 sctx->b.launch_grid = si_launch_grid; in si_init_compute_functions()