1; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck %s 2 3; CHECK: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] undef, align 4 4 5define void @promote_alloca_size_63(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { 6entry: 7 %stack = alloca [5 x i32], align 4 8 %0 = load i32, i32 addrspace(1)* %in, align 4 9 %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 10 store i32 4, i32* %arrayidx1, align 4 11 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 12 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 13 %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 14 store i32 5, i32* %arrayidx3, align 4 15 %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 16 %2 = load i32, i32* %arrayidx10, align 4 17 store i32 %2, i32 addrspace(1)* %out, align 4 18 %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 19 %3 = load i32, i32* %arrayidx12 20 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 21 store i32 %3, i32 addrspace(1)* %arrayidx13 22 ret void 23} 24 25; CHECK: @promote_alloca_size_256.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] undef, align 4 26 27define void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #1 { 28entry: 29 %stack = alloca [5 x i32], align 4 30 %0 = load i32, i32 addrspace(1)* %in, align 4 31 %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 32 store i32 4, i32* %arrayidx1, align 4 33 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 34 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 35 %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 36 store i32 5, i32* %arrayidx3, align 4 37 %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 38 %2 = load i32, i32* %arrayidx10, align 4 39 store i32 %2, i32 addrspace(1)* %out, align 4 40 %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 41 %3 = load i32, i32* %arrayidx12 42 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 43 store i32 %3, i32 addrspace(1)* %arrayidx13 44 ret void 45} 46 47; CHECK: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1600 x [5 x i32]] undef, align 4 48 49define void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 { 50entry: 51 %stack = alloca [5 x i32], align 4 52 %0 = load i32, i32 addrspace(1)* %in, align 4 53 %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 54 store i32 4, i32* %arrayidx1, align 4 55 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 56 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 57 %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 58 store i32 5, i32* %arrayidx3, align 4 59 %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 60 %2 = load i32, i32* %arrayidx10, align 4 61 store i32 %2, i32 addrspace(1)* %out, align 4 62 %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 63 %3 = load i32, i32* %arrayidx12 64 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 65 store i32 %3, i32 addrspace(1)* %arrayidx13 66 ret void 67} 68 69; CHECK: @occupancy_0( 70; CHECK: alloca [5 x i32] 71define void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 { 72entry: 73 %stack = alloca [5 x i32], align 4 74 %0 = load i32, i32 addrspace(1)* %in, align 4 75 %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 76 store i32 4, i32* %arrayidx1, align 4 77 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 78 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 79 %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 80 store i32 5, i32* %arrayidx3, align 4 81 %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 82 %2 = load i32, i32* %arrayidx10, align 4 83 store i32 %2, i32 addrspace(1)* %out, align 4 84 %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 85 %3 = load i32, i32* %arrayidx12 86 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 87 store i32 %3, i32 addrspace(1)* %arrayidx13 88 ret void 89} 90 91; CHECK: @occupancy_max( 92; CHECK: alloca [5 x i32] 93define void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 { 94entry: 95 %stack = alloca [5 x i32], align 4 96 %0 = load i32, i32 addrspace(1)* %in, align 4 97 %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 98 store i32 4, i32* %arrayidx1, align 4 99 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 100 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 101 %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 102 store i32 5, i32* %arrayidx3, align 4 103 %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 104 %2 = load i32, i32* %arrayidx10, align 4 105 store i32 %2, i32 addrspace(1)* %out, align 4 106 %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 107 %3 = load i32, i32* %arrayidx12 108 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 109 store i32 %3, i32 addrspace(1)* %arrayidx13 110 ret void 111} 112 113attributes #0 = { nounwind "amdgpu-max-work-group-size"="63" } 114attributes #1 = { nounwind "amdgpu-max-waves-per-eu"="3" "amdgpu-max-work-group-size"="256" } 115attributes #2 = { nounwind "amdgpu-max-waves-per-eu"="1" "amdgpu-max-work-group-size"="1600" } 116attributes #3 = { nounwind "amdgpu-max-waves-per-eu"="0" } 117attributes #4 = { nounwind "amdgpu-max-waves-per-eu"="-1" } 118