1// Copyright 2021 The Fuchsia Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#version 460 6 7// 8// Each workgroup fills up to RS_BLOCK_KEYVALS 9// 10 11// clang-format off 12#extension GL_GOOGLE_include_directive : require 13#extension GL_EXT_control_flow_attributes : require 14// clang-format on 15 16// 17// Load arch/keyval configuration 18// 19#include "config.h" 20 21// 22// Buffer reference macros and push constants 23// 24#include "bufref.h" 25#include "push.h" 26 27// 28// Subgroup uniform support 29// 30#if defined(RS_SCATTER_SUBGROUP_UNIFORM_DISABLE) && defined(GL_EXT_subgroupuniform_qualifier) 31#extension GL_EXT_subgroupuniform_qualifier : required 32#define RS_SUBGROUP_UNIFORM subgroupuniformEXT 33#else 34#define RS_SUBGROUP_UNIFORM 35#endif 36 37// 38// Declare the push constants 39// 40RS_STRUCT_PUSH_FILL(); 41 42layout(push_constant) uniform block_push 43{ 44 rs_push_fill push; 45}; 46 47// 48// The "init" shader configures the fill info structure. 49// 50RS_STRUCT_INDIRECT_INFO_FILL(); 51 52// 53// Check all switches are defined 54// 55#ifndef RS_FILL_WORKGROUP_SIZE_LOG2 56#error "Undefined: RS_FILL_WORKGROUP_SIZE_LOG2" 57#endif 58 59// 60#ifndef RS_FILL_BLOCK_ROWS 61#error "Undefined: RS_FILL_BLOCK_ROWS" 62#endif 63 64// 65// Local macros 66// 67// clang-format off 68#define RS_WORKGROUP_SIZE (1 << RS_FILL_WORKGROUP_SIZE_LOG2) 69#define RS_BLOCK_DWORDS (RS_FILL_BLOCK_ROWS * RS_WORKGROUP_SIZE) 70#define RS_RADIX_MASK ((1 << RS_RADIX_LOG2) - 1) 71// clang-format on 72 73// 74// 75// 76layout(local_size_x = RS_WORKGROUP_SIZE) in; 77 78// 79// 80// 81layout(buffer_reference, std430) buffer buffer_rs_indirect_info_fill 82{ 83 rs_indirect_info_fill info; 84}; 85 86layout(buffer_reference, std430) buffer buffer_rs_dwords 87{ 88 uint32_t extent[]; 89}; 90 91// 92// 93// 94void 95main() 96{ 97 // 98 // Define indirect info bufref for the fill 99 // 100 readonly RS_BUFREF_DEFINE(buffer_rs_indirect_info_fill, rs_info, push.devaddr_info); 101 102 RS_SUBGROUP_UNIFORM const rs_indirect_info_fill info = rs_info.info; 103 104 // 105 // Define dwords bufref 106 // 107 // Assumes less than 2^32-1 keys and then extended multiplies it by 108 // the keyval size. 109 // 110 // Assumes push.devaddr_dwords_base is suitably aligned to 111 // RS_BLOCK_DWORDS -- at a subgroup or transaction size is fine. 112 // 113 const uint32_t dwords_idx = 114 (info.block_offset + gl_WorkGroupID.x) * RS_BLOCK_DWORDS + gl_LocalInvocationID.x; 115 116 u32vec2 dwords_offset; 117 118 umulExtended(dwords_idx, 4, dwords_offset.y, dwords_offset.x); 119 120 writeonly RS_BUFREF_DEFINE_AT_OFFSET_U32VEC2(buffer_rs_dwords, 121 rs_dwords, 122 push.devaddr_dwords, 123 dwords_offset); 124 125 // 126 // Fills are always aligned to RS_BLOCK_KEYVALS 127 // 128 // ((v >= min) && (v < max)) == ((v - min) < (max - min)) 129 // 130 const uint32_t row_idx = dwords_idx - info.dword_offset_min; 131 132 [[unroll]] for (uint32_t ii = 0; ii < RS_FILL_BLOCK_ROWS; ii++) 133 { 134 if (row_idx + (ii * RS_WORKGROUP_SIZE) < info.dword_offset_max_minus_min) 135 { 136 rs_dwords.extent[ii * RS_WORKGROUP_SIZE] = push.dword; 137 } 138 } 139} 140 141// 142// 143// 144