1#!amber 2# Copyright 2024 The Amber Authors. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# https://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16 17# This benchmark tests the latency and throughput of 18# shared memory vs ssbo (main cached) memory. 19# Configs to manually modify: 20# - Comment in/out declaration : Shared vs ssbo 21# - local_size_x (workgroup size) : Single SM throughput 22# - number of loop unrolls : latency of single thread 23# - compute/dispatch size (currently 1) : Device throughput 24 25 26SHADER compute workgroup_shared_vs_ssbo GLSL 27#version 430 28 29layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; 30 31// Comment in/out these two lines to test shared memory 32struct BlockB { uint data[8]; }; shared BlockB ssbo_wr; 33//layout(set = 0, binding = 0) buffer BlockB { uint data[];} ssbo_wr; 34 35layout(set = 0, binding = 1) buffer BlockA { 36 uint data[]; 37} ssbo_fake_volatile; 38 39 40void main() { 41 // This is required when using shared memory 42 if( gl_LocalInvocationID.x == 0){ 43 ssbo_wr.data[0] = 0; 44 } 45 barrier(); 46 uint fv = ssbo_fake_volatile.data[0]; 47 uint iter_val = ssbo_wr.data[fv]; 48 for(uint i = 0;i<1000;i++){ 49 // 10x 50 iter_val = ssbo_wr.data[iter_val]; 51 iter_val = ssbo_wr.data[iter_val]; 52 iter_val = ssbo_wr.data[iter_val]; 53 iter_val = ssbo_wr.data[iter_val]; 54 iter_val = ssbo_wr.data[iter_val]; 55 iter_val = ssbo_wr.data[iter_val]; 56 iter_val = ssbo_wr.data[iter_val]; 57 iter_val = ssbo_wr.data[iter_val]; 58 iter_val = ssbo_wr.data[iter_val]; 59 iter_val = ssbo_wr.data[iter_val]; 60 61 // 10x 62 iter_val = ssbo_wr.data[iter_val]; 63 iter_val = ssbo_wr.data[iter_val]; 64 iter_val = ssbo_wr.data[iter_val]; 65 iter_val = ssbo_wr.data[iter_val]; 66 iter_val = ssbo_wr.data[iter_val]; 67 iter_val = ssbo_wr.data[iter_val]; 68 iter_val = ssbo_wr.data[iter_val]; 69 iter_val = ssbo_wr.data[iter_val]; 70 iter_val = ssbo_wr.data[iter_val]; 71 iter_val = ssbo_wr.data[iter_val]; 72 } 73 ssbo_wr.data[gl_LocalInvocationID.x] = iter_val; 74} 75END 76 77BUFFER buf_uint DATA_TYPE uint32 SIZE 1024 FILL 0 78BUFFER buf_fake_volatile DATA_TYPE uint32 SIZE 1048576 FILL 0 79 80PIPELINE compute pipeline 81 ATTACH workgroup_shared_vs_ssbo 82 BIND BUFFER buf_uint AS storage DESCRIPTOR_SET 0 BINDING 0 83 BIND BUFFER buf_fake_volatile AS storage DESCRIPTOR_SET 0 BINDING 1 84END 85 86REPEAT 333 87RUN TIMED_EXECUTION pipeline 1 1 1 88END 89 90