1/* 2 * Copyright 2024 Alyssa Rosenzweig 3 * Copyright 2024 Valve Corporation 4 * Copyright 2022 Collabora Ltd. and Red Hat Inc. 5 * SPDX-License-Identifier: MIT 6 */ 7#include "compiler/libcl/libcl.h" 8#include "query.h" 9 10static inline void 11write_query_result(uintptr_t dst_addr, int32_t idx, bool is_64, uint64_t result) 12{ 13 if (is_64) { 14 global uint64_t *out = (global uint64_t *)dst_addr; 15 out[idx] = result; 16 } else { 17 global uint32_t *out = (global uint32_t *)dst_addr; 18 out[idx] = result; 19 } 20} 21 22KERNEL(32) 23libagx_copy_query(global uint32_t *availability, global uint64_t *results, 24 global uint16_t *oq_index, uint64_t dst_addr, 25 uint64_t dst_stride, uint32_t first_query, uint16_t partial, 26 uint16_t _64, uint16_t with_availability, 27 uint16_t reports_per_query) 28{ 29 uint i = get_global_id(0); 30 uint64_t dst = dst_addr + (((uint64_t)i) * dst_stride); 31 uint32_t query = first_query + i; 32 33 bool available; 34 if (availability) 35 available = availability[query]; 36 else 37 available = (results[query] != LIBAGX_QUERY_UNAVAILABLE); 38 39 if (available || partial) { 40 /* For occlusion queries, results[] points to the device global heap. We 41 * need to remap indices according to the query pool's allocation. 42 */ 43 uint result_index = oq_index ? oq_index[query] : query; 44 uint idx = result_index * reports_per_query; 45 46 for (unsigned i = 0; i < reports_per_query; ++i) { 47 write_query_result(dst, i, _64, results[idx + i]); 48 } 49 } 50 51 if (with_availability) { 52 write_query_result(dst, reports_per_query, _64, available); 53 } 54} 55 56/* TODO: Share with Gallium... */ 57enum pipe_query_value_type { 58 PIPE_QUERY_TYPE_I32, 59 PIPE_QUERY_TYPE_U32, 60 PIPE_QUERY_TYPE_I64, 61 PIPE_QUERY_TYPE_U64, 62}; 63 64KERNEL(1) 65libagx_copy_query_gl(global uint64_t *query, global uint64_t *dest, 66 ushort value_type, ushort bool_size) 67{ 68 uint64_t value = *query; 69 70 if (bool_size == 4) { 71 value = (uint32_t)value; 72 } 73 74 if (bool_size) { 75 value = value != 0; 76 } 77 78 if (value_type <= PIPE_QUERY_TYPE_U32) { 79 global uint32_t *dest32 = (global uint32_t *)dest; 80 bool u32 = (value_type == PIPE_QUERY_TYPE_U32); 81 82 *dest32 = u32 ? convert_uint_sat(value) : convert_int_sat((int64_t)value); 83 } else { 84 *dest = value; 85 } 86} 87 88KERNEL(4) 89libagx_copy_xfb_counters(constant struct libagx_xfb_counter_copy *push) 90{ 91 unsigned i = get_local_id(0); 92 93 *(push->dest[i]) = push->src[i] ? *(push->src[i]) : 0; 94} 95 96KERNEL(1) 97libagx_increment_statistic(global uint32_t *statistic, uint32_t delta) 98{ 99 *statistic += delta; 100} 101 102KERNEL(1) 103libagx_increment_cs_invocations(global uint *grid, global uint32_t *statistic, 104 uint32_t local_size_threads) 105{ 106 *statistic += local_size_threads * grid[0] * grid[1] * grid[2]; 107} 108 109KERNEL(32) 110libagx_write_u32s(constant struct libagx_imm_write *p) 111{ 112 uint id = get_global_id(0); 113 *(p[id].address) = p[id].value; 114} 115 116/* 117 * We set the source as volatile since the caching situation around timestamps 118 * is a bit unclear. It might not be necessary but - absent hardware/firmware 119 * documentation - this gives me peace of mind. 120 */ 121KERNEL(1) 122libagx_copy_timestamp(global uint64_t *dest, volatile global uint64_t *src) 123{ 124 *dest = *src; 125} 126 127KERNEL(1) 128libagx_write_u32(global uint32_t *address, uint32_t value) 129{ 130 *address = value; 131} 132