1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s 3; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s 4 5define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { 6 ; HSA-VI-LABEL: name: i8_arg 7 ; HSA-VI: bb.1 (%ir-block.0): 8 ; HSA-VI: liveins: $sgpr4_sgpr5 9 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 10 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 11 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 12 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 13 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 14 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 15 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) 16 ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) 17 ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 18 ; HSA-VI: S_ENDPGM 0 19 ; LEGACY-MESA-VI-LABEL: name: i8_arg 20 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 21 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 22 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 23 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 24 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 25 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 26 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 27 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 28 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) 29 ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) 30 ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 31 ; LEGACY-MESA-VI: S_ENDPGM 0 32 %ext = zext i8 %in to i32 33 store i32 %ext, i32 addrspace(1)* %out, align 4 34 ret void 35} 36 37define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { 38 ; HSA-VI-LABEL: name: i8_zext_arg 39 ; HSA-VI: bb.1 (%ir-block.0): 40 ; HSA-VI: liveins: $sgpr4_sgpr5 41 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 42 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 43 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 44 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 45 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 46 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 47 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) 48 ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) 49 ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 50 ; HSA-VI: S_ENDPGM 0 51 ; LEGACY-MESA-VI-LABEL: name: i8_zext_arg 52 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 53 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 54 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 55 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 56 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 57 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 58 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 59 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 60 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) 61 ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) 62 ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 63 ; LEGACY-MESA-VI: S_ENDPGM 0 64 %ext = zext i8 %in to i32 65 store i32 %ext, i32 addrspace(1)* %out, align 4 66 ret void 67} 68 69define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { 70 ; HSA-VI-LABEL: name: i8_sext_arg 71 ; HSA-VI: bb.1 (%ir-block.0): 72 ; HSA-VI: liveins: $sgpr4_sgpr5 73 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 74 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 75 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 76 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 77 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 78 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 79 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) 80 ; HSA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) 81 ; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 82 ; HSA-VI: S_ENDPGM 0 83 ; LEGACY-MESA-VI-LABEL: name: i8_sext_arg 84 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 85 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 86 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 87 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 88 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 89 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 90 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 91 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 92 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) 93 ; LEGACY-MESA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) 94 ; LEGACY-MESA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 95 ; LEGACY-MESA-VI: S_ENDPGM 0 96 %ext = sext i8 %in to i32 97 store i32 %ext, i32 addrspace(1)* %out, align 4 98 ret void 99} 100 101define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { 102 ; HSA-VI-LABEL: name: i16_arg 103 ; HSA-VI: bb.1 (%ir-block.0): 104 ; HSA-VI: liveins: $sgpr4_sgpr5 105 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 106 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 107 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 108 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 109 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 110 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 111 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 8, addrspace 4) 112 ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) 113 ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 114 ; HSA-VI: S_ENDPGM 0 115 ; LEGACY-MESA-VI-LABEL: name: i16_arg 116 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 117 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 118 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 119 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 120 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 121 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 122 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 123 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 124 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 4, addrspace 4) 125 ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) 126 ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 127 ; LEGACY-MESA-VI: S_ENDPGM 0 128 %ext = zext i16 %in to i32 129 store i32 %ext, i32 addrspace(1)* %out, align 4 130 ret void 131} 132 133define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { 134 ; HSA-VI-LABEL: name: i16_zext_arg 135 ; HSA-VI: bb.1 (%ir-block.0): 136 ; HSA-VI: liveins: $sgpr4_sgpr5 137 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 138 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 139 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 140 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 141 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 142 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 143 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 8, addrspace 4) 144 ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) 145 ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 146 ; HSA-VI: S_ENDPGM 0 147 ; LEGACY-MESA-VI-LABEL: name: i16_zext_arg 148 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 149 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 150 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 151 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 152 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 153 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 154 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 155 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 156 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 4, addrspace 4) 157 ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) 158 ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 159 ; LEGACY-MESA-VI: S_ENDPGM 0 160 %ext = zext i16 %in to i32 161 store i32 %ext, i32 addrspace(1)* %out, align 4 162 ret void 163} 164 165define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { 166 ; HSA-VI-LABEL: name: i16_sext_arg 167 ; HSA-VI: bb.1 (%ir-block.0): 168 ; HSA-VI: liveins: $sgpr4_sgpr5 169 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 170 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 171 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 172 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 173 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 174 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 175 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 8, addrspace 4) 176 ; HSA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16) 177 ; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 178 ; HSA-VI: S_ENDPGM 0 179 ; LEGACY-MESA-VI-LABEL: name: i16_sext_arg 180 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 181 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 182 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 183 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 184 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 185 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 186 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 187 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 188 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 4, addrspace 4) 189 ; LEGACY-MESA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16) 190 ; LEGACY-MESA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 191 ; LEGACY-MESA-VI: S_ENDPGM 0 192 %ext = sext i16 %in to i32 193 store i32 %ext, i32 addrspace(1)* %out, align 4 194 ret void 195} 196 197define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { 198 ; HSA-VI-LABEL: name: i32_arg 199 ; HSA-VI: bb.1.entry: 200 ; HSA-VI: liveins: $sgpr4_sgpr5 201 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 202 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 203 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 204 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 205 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 206 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 207 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4) 208 ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 209 ; HSA-VI: S_ENDPGM 0 210 ; LEGACY-MESA-VI-LABEL: name: i32_arg 211 ; LEGACY-MESA-VI: bb.1.entry: 212 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 213 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 214 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 215 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 216 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 217 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 218 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 219 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, addrspace 4) 220 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 221 ; LEGACY-MESA-VI: S_ENDPGM 0 222entry: 223 store i32 %in, i32 addrspace(1)* %out, align 4 224 ret void 225} 226 227define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind { 228 ; HSA-VI-LABEL: name: f32_arg 229 ; HSA-VI: bb.1.entry: 230 ; HSA-VI: liveins: $sgpr4_sgpr5 231 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 232 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 233 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 234 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 235 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 236 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 237 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4) 238 ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 239 ; HSA-VI: S_ENDPGM 0 240 ; LEGACY-MESA-VI-LABEL: name: f32_arg 241 ; LEGACY-MESA-VI: bb.1.entry: 242 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 243 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 244 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 245 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 246 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 247 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 248 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 249 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, addrspace 4) 250 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 251 ; LEGACY-MESA-VI: S_ENDPGM 0 252entry: 253 store float %in, float addrspace(1)* %out, align 4 254 ret void 255} 256 257define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { 258 ; HSA-VI-LABEL: name: v2i8_arg 259 ; HSA-VI: bb.1.entry: 260 ; HSA-VI: liveins: $sgpr4_sgpr5 261 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 262 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 263 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 264 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 265 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 266 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 267 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 8, addrspace 4) 268 ; HSA-VI: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store 2 into %ir.out, addrspace 1) 269 ; HSA-VI: S_ENDPGM 0 270 ; LEGACY-MESA-VI-LABEL: name: v2i8_arg 271 ; LEGACY-MESA-VI: bb.1.entry: 272 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 273 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 274 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 275 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 276 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 277 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 278 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 279 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 4, addrspace 4) 280 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store 2 into %ir.out, addrspace 1) 281 ; LEGACY-MESA-VI: S_ENDPGM 0 282entry: 283 store <2 x i8> %in, <2 x i8> addrspace(1)* %out 284 ret void 285} 286 287define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { 288 ; HSA-VI-LABEL: name: v2i16_arg 289 ; HSA-VI: bb.1.entry: 290 ; HSA-VI: liveins: $sgpr4_sgpr5 291 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 292 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 293 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 294 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 295 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 296 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 297 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4) 298 ; HSA-VI: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 299 ; HSA-VI: S_ENDPGM 0 300 ; LEGACY-MESA-VI-LABEL: name: v2i16_arg 301 ; LEGACY-MESA-VI: bb.1.entry: 302 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 303 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 304 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 305 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 306 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 307 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 308 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 309 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, addrspace 4) 310 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 311 ; LEGACY-MESA-VI: S_ENDPGM 0 312entry: 313 store <2 x i16> %in, <2 x i16> addrspace(1)* %out 314 ret void 315} 316 317define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { 318 ; HSA-VI-LABEL: name: v2i32_arg 319 ; HSA-VI: bb.1.entry: 320 ; HSA-VI: liveins: $sgpr4_sgpr5 321 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 322 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 323 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 324 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 325 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 326 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 327 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4) 328 ; HSA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1) 329 ; HSA-VI: S_ENDPGM 0 330 ; LEGACY-MESA-VI-LABEL: name: v2i32_arg 331 ; LEGACY-MESA-VI: bb.1.entry: 332 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 333 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 334 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 335 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 336 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 337 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 338 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 339 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 340 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1) 341 ; LEGACY-MESA-VI: S_ENDPGM 0 342entry: 343 store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 344 ret void 345} 346 347define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { 348 ; HSA-VI-LABEL: name: v2f32_arg 349 ; HSA-VI: bb.1.entry: 350 ; HSA-VI: liveins: $sgpr4_sgpr5 351 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 352 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 353 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 354 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 355 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 356 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 357 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4) 358 ; HSA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1) 359 ; HSA-VI: S_ENDPGM 0 360 ; LEGACY-MESA-VI-LABEL: name: v2f32_arg 361 ; LEGACY-MESA-VI: bb.1.entry: 362 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 363 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 364 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 365 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 366 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 367 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 368 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 369 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 370 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1) 371 ; LEGACY-MESA-VI: S_ENDPGM 0 372entry: 373 store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 374 ret void 375} 376 377define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { 378 ; HSA-VI-LABEL: name: v3i8_arg 379 ; HSA-VI: bb.1.entry: 380 ; HSA-VI: liveins: $sgpr4_sgpr5 381 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 382 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 383 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 384 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 385 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 386 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 387 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 3, align 8, addrspace 4) 388 ; HSA-VI: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store 3 into %ir.out, align 4, addrspace 1) 389 ; HSA-VI: S_ENDPGM 0 390 ; LEGACY-MESA-VI-LABEL: name: v3i8_arg 391 ; LEGACY-MESA-VI: bb.1.entry: 392 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 393 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 394 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 395 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 396 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 397 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 398 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 399 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 3, align 4, addrspace 4) 400 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store 3 into %ir.out, align 4, addrspace 1) 401 ; LEGACY-MESA-VI: S_ENDPGM 0 402entry: 403 store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 404 ret void 405} 406 407define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { 408 ; HSA-VI-LABEL: name: v3i16_arg 409 ; HSA-VI: bb.1.entry: 410 ; HSA-VI: liveins: $sgpr4_sgpr5 411 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 412 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 413 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 414 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 415 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 416 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 417 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 6, align 8, addrspace 4) 418 ; HSA-VI: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store 6 into %ir.out, align 4, addrspace 1) 419 ; HSA-VI: S_ENDPGM 0 420 ; LEGACY-MESA-VI-LABEL: name: v3i16_arg 421 ; LEGACY-MESA-VI: bb.1.entry: 422 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 423 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 424 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 425 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 426 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 427 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 428 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 429 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 6, align 4, addrspace 4) 430 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store 6 into %ir.out, align 4, addrspace 1) 431 ; LEGACY-MESA-VI: S_ENDPGM 0 432entry: 433 store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 434 ret void 435} 436 437define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { 438 ; HSA-VI-LABEL: name: v3i32_arg 439 ; HSA-VI: bb.1.entry: 440 ; HSA-VI: liveins: $sgpr4_sgpr5 441 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 442 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 443 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 444 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 445 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 446 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 447 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 16, addrspace 4) 448 ; HSA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1) 449 ; HSA-VI: S_ENDPGM 0 450 ; LEGACY-MESA-VI-LABEL: name: v3i32_arg 451 ; LEGACY-MESA-VI: bb.1.entry: 452 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 453 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 454 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 455 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 456 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 457 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 458 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 459 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 4, addrspace 4) 460 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1) 461 ; LEGACY-MESA-VI: S_ENDPGM 0 462entry: 463 store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 464 ret void 465} 466 467define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { 468 ; HSA-VI-LABEL: name: v3f32_arg 469 ; HSA-VI: bb.1.entry: 470 ; HSA-VI: liveins: $sgpr4_sgpr5 471 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 472 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 473 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 474 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 475 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 476 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 477 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 16, addrspace 4) 478 ; HSA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1) 479 ; HSA-VI: S_ENDPGM 0 480 ; LEGACY-MESA-VI-LABEL: name: v3f32_arg 481 ; LEGACY-MESA-VI: bb.1.entry: 482 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 483 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 484 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 485 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 486 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 487 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 488 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 489 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 4, addrspace 4) 490 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1) 491 ; LEGACY-MESA-VI: S_ENDPGM 0 492entry: 493 store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 494 ret void 495} 496 497define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { 498 ; HSA-VI-LABEL: name: v4i8_arg 499 ; HSA-VI: bb.1.entry: 500 ; HSA-VI: liveins: $sgpr4_sgpr5 501 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 502 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 503 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 504 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 505 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 506 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 507 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4) 508 ; HSA-VI: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 509 ; HSA-VI: S_ENDPGM 0 510 ; LEGACY-MESA-VI-LABEL: name: v4i8_arg 511 ; LEGACY-MESA-VI: bb.1.entry: 512 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 513 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 514 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 515 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 516 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 517 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 518 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 519 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, addrspace 4) 520 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 521 ; LEGACY-MESA-VI: S_ENDPGM 0 522entry: 523 store <4 x i8> %in, <4 x i8> addrspace(1)* %out 524 ret void 525} 526 527define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { 528 ; HSA-VI-LABEL: name: v4i16_arg 529 ; HSA-VI: bb.1.entry: 530 ; HSA-VI: liveins: $sgpr4_sgpr5 531 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 532 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 533 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 534 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 535 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 536 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 537 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4) 538 ; HSA-VI: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) 539 ; HSA-VI: S_ENDPGM 0 540 ; LEGACY-MESA-VI-LABEL: name: v4i16_arg 541 ; LEGACY-MESA-VI: bb.1.entry: 542 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 543 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 544 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 545 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 546 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 547 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 548 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 549 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 550 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) 551 ; LEGACY-MESA-VI: S_ENDPGM 0 552entry: 553 store <4 x i16> %in, <4 x i16> addrspace(1)* %out 554 ret void 555} 556 557define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { 558 ; HSA-VI-LABEL: name: v4i32_arg 559 ; HSA-VI: bb.1.entry: 560 ; HSA-VI: liveins: $sgpr4_sgpr5 561 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 562 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 563 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 564 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 565 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 566 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 567 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, addrspace 4) 568 ; HSA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1) 569 ; HSA-VI: S_ENDPGM 0 570 ; LEGACY-MESA-VI-LABEL: name: v4i32_arg 571 ; LEGACY-MESA-VI: bb.1.entry: 572 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 573 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 574 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 575 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 576 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 577 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 578 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 579 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) 580 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1) 581 ; LEGACY-MESA-VI: S_ENDPGM 0 582entry: 583 store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 584 ret void 585} 586 587define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { 588 ; HSA-VI-LABEL: name: v4f32_arg 589 ; HSA-VI: bb.1.entry: 590 ; HSA-VI: liveins: $sgpr4_sgpr5 591 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 592 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 593 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 594 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 595 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 596 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 597 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, addrspace 4) 598 ; HSA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1) 599 ; HSA-VI: S_ENDPGM 0 600 ; LEGACY-MESA-VI-LABEL: name: v4f32_arg 601 ; LEGACY-MESA-VI: bb.1.entry: 602 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 603 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 604 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 605 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 606 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 607 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 608 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 609 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) 610 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1) 611 ; LEGACY-MESA-VI: S_ENDPGM 0 612entry: 613 store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 614 ret void 615} 616 617define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { 618 ; HSA-VI-LABEL: name: v8i8_arg 619 ; HSA-VI: bb.1.entry: 620 ; HSA-VI: liveins: $sgpr4_sgpr5 621 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 622 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 623 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 624 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 625 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 626 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 627 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4) 628 ; HSA-VI: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) 629 ; HSA-VI: S_ENDPGM 0 630 ; LEGACY-MESA-VI-LABEL: name: v8i8_arg 631 ; LEGACY-MESA-VI: bb.1.entry: 632 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 633 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 634 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 635 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 636 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 637 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 638 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 639 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 640 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) 641 ; LEGACY-MESA-VI: S_ENDPGM 0 642entry: 643 store <8 x i8> %in, <8 x i8> addrspace(1)* %out 644 ret void 645} 646 647define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { 648 ; HSA-VI-LABEL: name: v8i16_arg 649 ; HSA-VI: bb.1.entry: 650 ; HSA-VI: liveins: $sgpr4_sgpr5 651 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 652 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 653 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 654 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 655 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 656 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 657 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, addrspace 4) 658 ; HSA-VI: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1) 659 ; HSA-VI: S_ENDPGM 0 660 ; LEGACY-MESA-VI-LABEL: name: v8i16_arg 661 ; LEGACY-MESA-VI: bb.1.entry: 662 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 663 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 664 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 665 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 666 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 667 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 668 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 669 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) 670 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1) 671 ; LEGACY-MESA-VI: S_ENDPGM 0 672entry: 673 store <8 x i16> %in, <8 x i16> addrspace(1)* %out 674 ret void 675} 676 677define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { 678 ; HSA-VI-LABEL: name: v8i32_arg 679 ; HSA-VI: bb.1.entry: 680 ; HSA-VI: liveins: $sgpr4_sgpr5 681 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 682 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 683 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 684 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 685 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 686 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 687 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 16, addrspace 4) 688 ; HSA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1) 689 ; HSA-VI: S_ENDPGM 0 690 ; LEGACY-MESA-VI-LABEL: name: v8i32_arg 691 ; LEGACY-MESA-VI: bb.1.entry: 692 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 693 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 694 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 695 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 696 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 697 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 698 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 699 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 4, addrspace 4) 700 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1) 701 ; LEGACY-MESA-VI: S_ENDPGM 0 702entry: 703 store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 704 ret void 705} 706 707define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { 708 ; HSA-VI-LABEL: name: v8f32_arg 709 ; HSA-VI: bb.1.entry: 710 ; HSA-VI: liveins: $sgpr4_sgpr5 711 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 712 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 713 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 714 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 715 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 716 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 717 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 16, addrspace 4) 718 ; HSA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1) 719 ; HSA-VI: S_ENDPGM 0 720 ; LEGACY-MESA-VI-LABEL: name: v8f32_arg 721 ; LEGACY-MESA-VI: bb.1.entry: 722 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 723 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 724 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 725 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 726 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 727 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 728 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 729 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 4, addrspace 4) 730 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1) 731 ; LEGACY-MESA-VI: S_ENDPGM 0 732entry: 733 store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 734 ret void 735} 736 737define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { 738 ; HSA-VI-LABEL: name: v16i8_arg 739 ; HSA-VI: bb.1.entry: 740 ; HSA-VI: liveins: $sgpr4_sgpr5 741 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 742 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 743 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 744 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 745 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 746 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 747 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, addrspace 4) 748 ; HSA-VI: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1) 749 ; HSA-VI: S_ENDPGM 0 750 ; LEGACY-MESA-VI-LABEL: name: v16i8_arg 751 ; LEGACY-MESA-VI: bb.1.entry: 752 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 753 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 754 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 755 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 756 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 757 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 758 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 759 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) 760 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1) 761 ; LEGACY-MESA-VI: S_ENDPGM 0 762entry: 763 store <16 x i8> %in, <16 x i8> addrspace(1)* %out 764 ret void 765} 766 767define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { 768 ; HSA-VI-LABEL: name: v16i16_arg 769 ; HSA-VI: bb.1.entry: 770 ; HSA-VI: liveins: $sgpr4_sgpr5 771 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 772 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 773 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 774 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 775 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 776 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 777 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 16, addrspace 4) 778 ; HSA-VI: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store 32 into %ir.out, addrspace 1) 779 ; HSA-VI: S_ENDPGM 0 780 ; LEGACY-MESA-VI-LABEL: name: v16i16_arg 781 ; LEGACY-MESA-VI: bb.1.entry: 782 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 783 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 784 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 785 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 786 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 787 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 788 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 789 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 4, addrspace 4) 790 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store 32 into %ir.out, addrspace 1) 791 ; LEGACY-MESA-VI: S_ENDPGM 0 792entry: 793 store <16 x i16> %in, <16 x i16> addrspace(1)* %out 794 ret void 795} 796 797define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { 798 ; HSA-VI-LABEL: name: v16i32_arg 799 ; HSA-VI: bb.1.entry: 800 ; HSA-VI: liveins: $sgpr4_sgpr5 801 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 802 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 803 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 804 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 805 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 806 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 807 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 64, align 16, addrspace 4) 808 ; HSA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1) 809 ; HSA-VI: S_ENDPGM 0 810 ; LEGACY-MESA-VI-LABEL: name: v16i32_arg 811 ; LEGACY-MESA-VI: bb.1.entry: 812 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 813 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 814 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 815 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 816 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 817 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 818 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 819 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 64, align 4, addrspace 4) 820 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1) 821 ; LEGACY-MESA-VI: S_ENDPGM 0 822entry: 823 store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 824 ret void 825} 826 827define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { 828 ; HSA-VI-LABEL: name: v16f32_arg 829 ; HSA-VI: bb.1.entry: 830 ; HSA-VI: liveins: $sgpr4_sgpr5 831 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 832 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 833 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 834 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 835 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 836 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 837 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 64, align 16, addrspace 4) 838 ; HSA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1) 839 ; HSA-VI: S_ENDPGM 0 840 ; LEGACY-MESA-VI-LABEL: name: v16f32_arg 841 ; LEGACY-MESA-VI: bb.1.entry: 842 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 843 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 844 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 845 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 846 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 847 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 848 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 849 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 64, align 4, addrspace 4) 850 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1) 851 ; LEGACY-MESA-VI: S_ENDPGM 0 852entry: 853 store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 854 ret void 855} 856 857define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { 858 ; HSA-VI-LABEL: name: kernel_arg_i64 859 ; HSA-VI: bb.1 (%ir-block.0): 860 ; HSA-VI: liveins: $sgpr4_sgpr5 861 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 862 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 863 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 864 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 865 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 866 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 867 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4) 868 ; HSA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) 869 ; HSA-VI: S_ENDPGM 0 870 ; LEGACY-MESA-VI-LABEL: name: kernel_arg_i64 871 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 872 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 873 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 874 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 875 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 876 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 877 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 878 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 879 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 880 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) 881 ; LEGACY-MESA-VI: S_ENDPGM 0 882 store i64 %a, i64 addrspace(1)* %out, align 8 883 ret void 884} 885 886define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in) { 887 ; HSA-VI-LABEL: name: f64_kernel_arg 888 ; HSA-VI: bb.1.entry: 889 ; HSA-VI: liveins: $sgpr4_sgpr5 890 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 891 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 892 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 893 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 894 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 895 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 896 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4) 897 ; HSA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) 898 ; HSA-VI: S_ENDPGM 0 899 ; LEGACY-MESA-VI-LABEL: name: f64_kernel_arg 900 ; LEGACY-MESA-VI: bb.1.entry: 901 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 902 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 903 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 904 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 905 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 906 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 907 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 908 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 909 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) 910 ; LEGACY-MESA-VI: S_ENDPGM 0 911entry: 912 store double %in, double addrspace(1)* %out 913 ret void 914} 915 916define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { 917 ; HSA-VI-LABEL: name: i1_arg 918 ; HSA-VI: bb.1 (%ir-block.0): 919 ; HSA-VI: liveins: $sgpr4_sgpr5 920 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 921 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 922 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 923 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 924 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 925 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 926 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) 927 ; HSA-VI: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store 1 into %ir.out, addrspace 1) 928 ; HSA-VI: S_ENDPGM 0 929 ; LEGACY-MESA-VI-LABEL: name: i1_arg 930 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 931 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 932 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 933 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 934 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 935 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 936 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 937 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 938 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) 939 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store 1 into %ir.out, addrspace 1) 940 ; LEGACY-MESA-VI: S_ENDPGM 0 941 store i1 %x, i1 addrspace(1)* %out, align 1 942 ret void 943} 944 945define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { 946 ; HSA-VI-LABEL: name: i1_arg_zext_i32 947 ; HSA-VI: bb.1 (%ir-block.0): 948 ; HSA-VI: liveins: $sgpr4_sgpr5 949 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 950 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 951 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 952 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 953 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 954 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 955 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) 956 ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1) 957 ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 958 ; HSA-VI: S_ENDPGM 0 959 ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i32 960 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 961 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 962 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 963 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 964 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 965 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 966 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 967 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 968 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) 969 ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1) 970 ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 971 ; LEGACY-MESA-VI: S_ENDPGM 0 972 %ext = zext i1 %x to i32 973 store i32 %ext, i32 addrspace(1)* %out, align 4 974 ret void 975} 976 977define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { 978 ; HSA-VI-LABEL: name: i1_arg_zext_i64 979 ; HSA-VI: bb.1 (%ir-block.0): 980 ; HSA-VI: liveins: $sgpr4_sgpr5 981 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 982 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 983 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 984 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 985 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 986 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 987 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) 988 ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1) 989 ; HSA-VI: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) 990 ; HSA-VI: S_ENDPGM 0 991 ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i64 992 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 993 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 994 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 995 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 996 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 997 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 998 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 999 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1000 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) 1001 ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1) 1002 ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) 1003 ; LEGACY-MESA-VI: S_ENDPGM 0 1004 %ext = zext i1 %x to i64 1005 store i64 %ext, i64 addrspace(1)* %out, align 8 1006 ret void 1007} 1008 1009define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { 1010 ; HSA-VI-LABEL: name: i1_arg_sext_i32 1011 ; HSA-VI: bb.1 (%ir-block.0): 1012 ; HSA-VI: liveins: $sgpr4_sgpr5 1013 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1014 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1015 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1016 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1017 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1018 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1019 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) 1020 ; HSA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1) 1021 ; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1022 ; HSA-VI: S_ENDPGM 0 1023 ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i32 1024 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1025 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1026 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1027 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1028 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1029 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1030 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1031 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1032 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) 1033 ; LEGACY-MESA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1) 1034 ; LEGACY-MESA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1035 ; LEGACY-MESA-VI: S_ENDPGM 0 1036 %ext = sext i1 %x to i32 1037 store i32 %ext, i32addrspace(1)* %out, align 4 1038 ret void 1039} 1040 1041define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { 1042 ; HSA-VI-LABEL: name: i1_arg_sext_i64 1043 ; HSA-VI: bb.1 (%ir-block.0): 1044 ; HSA-VI: liveins: $sgpr4_sgpr5 1045 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1046 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1047 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1048 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1049 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1050 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1051 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) 1052 ; HSA-VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1) 1053 ; HSA-VI: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) 1054 ; HSA-VI: S_ENDPGM 0 1055 ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i64 1056 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1057 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1058 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1059 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1060 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1061 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1062 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1063 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1064 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) 1065 ; LEGACY-MESA-VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1) 1066 ; LEGACY-MESA-VI: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) 1067 ; LEGACY-MESA-VI: S_ENDPGM 0 1068 %ext = sext i1 %x to i64 1069 store i64 %ext, i64 addrspace(1)* %out, align 8 1070 ret void 1071} 1072 1073; 0-sized arguments do not add a slot to the argument register set, so 1074; waste an index in the virtual register array. 1075define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind { 1076 ; HSA-VI-LABEL: name: empty_struct_arg 1077 ; HSA-VI: bb.1 (%ir-block.0): 1078 ; HSA-VI: liveins: $sgpr4_sgpr5 1079 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1080 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1081 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1082 ; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) 1083 ; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1084 ; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1085 ; HSA-VI: S_ENDPGM 0 1086 ; LEGACY-MESA-VI-LABEL: name: empty_struct_arg 1087 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1088 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1089 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1090 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1091 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1092 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4) 1093 ; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1094 ; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1095 ; LEGACY-MESA-VI: S_ENDPGM 0 1096 store i32 %arg1, i32 addrspace(1)* undef 1097 ret void 1098} 1099 1100define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind { 1101 ; HSA-VI-LABEL: name: empty_array_arg 1102 ; HSA-VI: bb.1 (%ir-block.0): 1103 ; HSA-VI: liveins: $sgpr4_sgpr5 1104 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1105 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1106 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1107 ; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) 1108 ; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1109 ; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1110 ; HSA-VI: S_ENDPGM 0 1111 ; LEGACY-MESA-VI-LABEL: name: empty_array_arg 1112 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1113 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1114 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1115 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1116 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1117 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4) 1118 ; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1119 ; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1120 ; LEGACY-MESA-VI: S_ENDPGM 0 1121 store i32 %arg1, i32 addrspace(1)* undef 1122 ret void 1123} 1124 1125; The correct load offsets for these: 1126; load 4 from 0, 1127; load 8 from 8 1128; load 4 from 24 1129; load 8 from 32 1130 1131; With the SelectionDAG argument lowering, the alignments for the 1132; struct members is not properly considered, making these wrong. 1133define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, {i32, i64} %arg1) { 1134 ; HSA-VI-LABEL: name: struct_argument_alignment 1135 ; HSA-VI: bb.1 (%ir-block.0): 1136 ; HSA-VI: liveins: $sgpr4_sgpr5 1137 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1138 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1139 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1140 ; HSA-VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 16, addrspace 4) 1141 ; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0 1142 ; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64 1143 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 1144 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1145 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 16, addrspace 4) 1146 ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 1147 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1148 ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 16, align 8, addrspace 4) 1149 ; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s128), 0 1150 ; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64 1151 ; HSA-VI: [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 1152 ; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1) 1153 ; HSA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C3]](p1) 1154 ; HSA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) 1155 ; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) 1156 ; HSA-VI: G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store 1 into `i8 addrspace(1)* null`, addrspace 1) 1157 ; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) 1158 ; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) 1159 ; HSA-VI: S_ENDPGM 0 1160 ; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment 1161 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1162 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1163 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1164 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1165 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1166 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) 1167 ; LEGACY-MESA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0 1168 ; LEGACY-MESA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64 1169 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 1170 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1171 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) 1172 ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 1173 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1174 ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) 1175 ; LEGACY-MESA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s128), 0 1176 ; LEGACY-MESA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64 1177 ; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 1178 ; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1) 1179 ; LEGACY-MESA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C3]](p1) 1180 ; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) 1181 ; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) 1182 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store 1 into `i8 addrspace(1)* null`, addrspace 1) 1183 ; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) 1184 ; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) 1185 ; LEGACY-MESA-VI: S_ENDPGM 0 1186 %val0 = extractvalue {i32, i64} %arg0, 0 1187 %val1 = extractvalue {i32, i64} %arg0, 1 1188 %val2 = extractvalue {i32, i64} %arg1, 0 1189 %val3 = extractvalue {i32, i64} %arg1, 1 1190 store volatile i32 %val0, i32 addrspace(1)* null 1191 store volatile i64 %val1, i64 addrspace(1)* null 1192 store volatile i8 %pad, i8 addrspace(1)* null 1193 store volatile i32 %val2, i32 addrspace(1)* null 1194 store volatile i64 %val3, i64 addrspace(1)* null 1195 ret void 1196} 1197 1198; No padding between i8 and next struct, but round up at end to 4 byte 1199; multiple. 1200define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) { 1201 ; HSA-VI-LABEL: name: packed_struct_argument_alignment 1202 ; HSA-VI: bb.1 (%ir-block.1): 1203 ; HSA-VI: liveins: $sgpr4_sgpr5 1204 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1205 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1206 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1207 ; HSA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 12, align 16, addrspace 4) 1208 ; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0 1209 ; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32 1210 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 13 1211 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1212 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4) 1213 ; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0 1214 ; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32 1215 ; HSA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 1216 ; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1) 1217 ; HSA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) 1218 ; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) 1219 ; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) 1220 ; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) 1221 ; HSA-VI: S_ENDPGM 0 1222 ; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment 1223 ; LEGACY-MESA-VI: bb.1 (%ir-block.1): 1224 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1225 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1226 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1227 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1228 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 12, align 4, addrspace 4) 1229 ; LEGACY-MESA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0 1230 ; LEGACY-MESA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32 1231 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 49 1232 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1233 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4) 1234 ; LEGACY-MESA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0 1235 ; LEGACY-MESA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32 1236 ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 1237 ; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1) 1238 ; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) 1239 ; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) 1240 ; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) 1241 ; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) 1242 ; LEGACY-MESA-VI: S_ENDPGM 0 1243 %val0 = extractvalue <{i32, i64}> %arg0, 0 1244 %val1 = extractvalue <{i32, i64}> %arg0, 1 1245 %val2 = extractvalue <{i32, i64}> %arg1, 0 1246 %val3 = extractvalue <{i32, i64}> %arg1, 1 1247 store volatile i32 %val0, i32 addrspace(1)* null 1248 store volatile i64 %val1, i64 addrspace(1)* null 1249 store volatile i32 %val2, i32 addrspace(1)* null 1250 store volatile i64 %val3, i64 addrspace(1)* null 1251 ret void 1252} 1253 1254define amdgpu_kernel void @unused_i32_arg(i32 addrspace(1)* nocapture %out, i32 %unused, i32 %in) nounwind { 1255 ; HSA-VI-LABEL: name: unused_i32_arg 1256 ; HSA-VI: bb.1.entry: 1257 ; HSA-VI: liveins: $sgpr4_sgpr5 1258 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1259 ; HSA-VI: S_ENDPGM 0 1260 ; LEGACY-MESA-VI-LABEL: name: unused_i32_arg 1261 ; LEGACY-MESA-VI: bb.1.entry: 1262 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1263 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1264 ; LEGACY-MESA-VI: S_ENDPGM 0 1265entry: 1266 ret void 1267} 1268 1269; Byref pointers should only be treated as offsets from kernarg 1270define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %out, i8 addrspace(4)* byref(i8) %in.byref) { 1271 ; HSA-VI-LABEL: name: byref_constant_i8_arg 1272 ; HSA-VI: bb.1 (%ir-block.0): 1273 ; HSA-VI: liveins: $sgpr4_sgpr5 1274 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1275 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1276 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1277 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1278 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1279 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1280 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 1 from %ir.in.byref, addrspace 4) 1281 ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) 1282 ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1283 ; HSA-VI: S_ENDPGM 0 1284 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i8_arg 1285 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1286 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1287 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1288 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1289 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1290 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1291 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1292 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1293 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 1 from %ir.in.byref, addrspace 4) 1294 ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) 1295 ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1296 ; LEGACY-MESA-VI: S_ENDPGM 0 1297 %in = load i8, i8 addrspace(4)* %in.byref 1298 %ext = zext i8 %in to i32 1299 store i32 %ext, i32 addrspace(1)* %out, align 4 1300 ret void 1301} 1302 1303define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %out, i16 addrspace(4)* byref(i16) align 2 %in.byref) { 1304 ; HSA-VI-LABEL: name: byref_constant_i16_arg 1305 ; HSA-VI: bb.1 (%ir-block.0): 1306 ; HSA-VI: liveins: $sgpr4_sgpr5 1307 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1308 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1309 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1310 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1311 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1312 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1313 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 2 from %ir.in.byref, addrspace 4) 1314 ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) 1315 ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1316 ; HSA-VI: S_ENDPGM 0 1317 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i16_arg 1318 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1319 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1320 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1321 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1322 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1323 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1324 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1325 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1326 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 2 from %ir.in.byref, addrspace 4) 1327 ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) 1328 ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1329 ; LEGACY-MESA-VI: S_ENDPGM 0 1330 %in = load i16, i16 addrspace(4)* %in.byref 1331 %ext = zext i16 %in to i32 1332 store i32 %ext, i32 addrspace(1)* %out, align 4 1333 ret void 1334} 1335 1336define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align 4 %in.byref, i32 %after.offset) { 1337 ; HSA-VI-LABEL: name: byref_constant_i32_arg 1338 ; HSA-VI: bb.1 (%ir-block.0): 1339 ; HSA-VI: liveins: $sgpr4_sgpr5 1340 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1341 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1342 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1343 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1344 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1345 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1346 ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 1347 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1348 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4) 1349 ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) 1350 ; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1351 ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1352 ; HSA-VI: S_ENDPGM 0 1353 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg 1354 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1355 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1356 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1357 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1358 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1359 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1360 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1361 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1362 ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 1363 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1364 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) 1365 ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) 1366 ; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1367 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1368 ; LEGACY-MESA-VI: S_ENDPGM 0 1369 %in = load i32, i32 addrspace(4)* %in.byref 1370 store volatile i32 %in, i32 addrspace(1)* %out, align 4 1371 store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 1372 ret void 1373} 1374 1375define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> addrspace(4)* byref(<4 x i32>) align(16) %in.byref, i32 %after.offset) { 1376 ; HSA-VI-LABEL: name: byref_constant_v4i32_arg 1377 ; HSA-VI: bb.1 (%ir-block.0): 1378 ; HSA-VI: liveins: $sgpr4_sgpr5 1379 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1380 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1381 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1382 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1383 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 1384 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1385 ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 1386 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1387 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) 1388 ; HSA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 16 from %ir.in.byref, addrspace 4) 1389 ; HSA-VI: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store 16 into %ir.out, align 4, addrspace 1) 1390 ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out.cast, addrspace 1) 1391 ; HSA-VI: S_ENDPGM 0 1392 ; LEGACY-MESA-VI-LABEL: name: byref_constant_v4i32_arg 1393 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1394 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1395 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1396 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1397 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1398 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1399 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 1400 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1401 ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 1402 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1403 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4) 1404 ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 16 from %ir.in.byref, addrspace 4) 1405 ; LEGACY-MESA-VI: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store 16 into %ir.out, align 4, addrspace 1) 1406 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out.cast, addrspace 1) 1407 ; LEGACY-MESA-VI: S_ENDPGM 0 1408 %in = load <4 x i32>, <4 x i32> addrspace(4)* %in.byref 1409 store volatile <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 1410 %out.cast = bitcast <4 x i32> addrspace(1)* %out to i32 addrspace(1)* 1411 store volatile i32 %after.offset, i32 addrspace(1)* %out.cast, align 4 1412 ret void 1413} 1414 1415define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(256) %in.byref, i32 %after.offset) { 1416 ; HSA-VI-LABEL: name: byref_align_constant_i32_arg 1417 ; HSA-VI: bb.1 (%ir-block.0): 1418 ; HSA-VI: liveins: $sgpr4_sgpr5 1419 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1420 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1421 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1422 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1423 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 1424 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1425 ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260 1426 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1427 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4) 1428 ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) 1429 ; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1430 ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1431 ; HSA-VI: S_ENDPGM 0 1432 ; LEGACY-MESA-VI-LABEL: name: byref_align_constant_i32_arg 1433 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1434 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1435 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1436 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1437 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1438 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1439 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 292 1440 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1441 ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296 1442 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1443 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4) 1444 ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) 1445 ; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1446 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1447 ; LEGACY-MESA-VI: S_ENDPGM 0 1448 %in = load i32, i32 addrspace(4)* %in.byref 1449 store volatile i32 %in, i32 addrspace(1)* %out, align 4 1450 store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 1451 ret void 1452} 1453 1454define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace(1)* nocapture %out, i8, <16 x i32> addrspace(4)* byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) { 1455 ; HSA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg 1456 ; HSA-VI: bb.1 (%ir-block.1): 1457 ; HSA-VI: liveins: $sgpr4_sgpr5 1458 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1459 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1460 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1461 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1462 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 1463 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1464 ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 1465 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1466 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) 1467 ; HSA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 64 from %ir.in.byref, addrspace 4) 1468 ; HSA-VI: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store 64 into %ir.cast.out, align 4, addrspace 1) 1469 ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1470 ; HSA-VI: S_ENDPGM 0 1471 ; LEGACY-MESA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg 1472 ; LEGACY-MESA-VI: bb.1 (%ir-block.1): 1473 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1474 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1475 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1476 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1477 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1478 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 1479 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1480 ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164 1481 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1482 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4) 1483 ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 64 from %ir.in.byref, addrspace 4) 1484 ; LEGACY-MESA-VI: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store 64 into %ir.cast.out, align 4, addrspace 1) 1485 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1486 ; LEGACY-MESA-VI: S_ENDPGM 0 1487 %in = load <16 x i32>, <16 x i32> addrspace(4)* %in.byref 1488 %cast.out = bitcast i32 addrspace(1)* %out to <16 x i32> addrspace(1)* 1489 store volatile <16 x i32> %in, <16 x i32> addrspace(1)* %cast.out, align 4 1490 store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 1491 ret void 1492} 1493 1494; Also accept byref kernel arguments with other global address spaces. 1495define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* byref(i32) align(4) %in.byref) { 1496 ; HSA-VI-LABEL: name: byref_global_i32_arg 1497 ; HSA-VI: bb.1 (%ir-block.0): 1498 ; HSA-VI: liveins: $sgpr4_sgpr5 1499 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1500 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1501 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1502 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1503 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1504 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1505 ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1506 ; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1) 1507 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load 4 from %ir.1, addrspace 1) 1508 ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1509 ; HSA-VI: S_ENDPGM 0 1510 ; LEGACY-MESA-VI-LABEL: name: byref_global_i32_arg 1511 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1512 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1513 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1514 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1515 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1516 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1517 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1518 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1519 ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1520 ; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1) 1521 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load 4 from %ir.1, addrspace 1) 1522 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1523 ; LEGACY-MESA-VI: S_ENDPGM 0 1524 %in = load i32, i32 addrspace(1)* %in.byref 1525 store i32 %in, i32 addrspace(1)* %out, align 4 1526 ret void 1527} 1528 1529define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, i32* byref(i32) align(4) %in.byref) { 1530 ; HSA-VI-LABEL: name: byref_flat_i32_arg 1531 ; HSA-VI: bb.1 (%ir-block.0): 1532 ; HSA-VI: liveins: $sgpr4_sgpr5 1533 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1534 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1535 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1536 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1537 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1538 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1539 ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1540 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load 4 from %ir.in.byref) 1541 ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1542 ; HSA-VI: S_ENDPGM 0 1543 ; LEGACY-MESA-VI-LABEL: name: byref_flat_i32_arg 1544 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1545 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1546 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1547 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1548 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1549 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1550 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1551 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1552 ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1553 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load 4 from %ir.in.byref) 1554 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1555 ; LEGACY-MESA-VI: S_ENDPGM 0 1556 %in = load i32, i32* %in.byref 1557 store i32 %in, i32 addrspace(1)* %out, align 4 1558 ret void 1559} 1560 1561define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(6)* byref(i32) align(4) %in.byref) { 1562 ; HSA-VI-LABEL: name: byref_constant_32bit_i32_arg 1563 ; HSA-VI: bb.1 (%ir-block.0): 1564 ; HSA-VI: liveins: $sgpr4_sgpr5 1565 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1566 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1567 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1568 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1569 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1570 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1571 ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1572 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load 4 from %ir.in.byref, addrspace 6) 1573 ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1574 ; HSA-VI: S_ENDPGM 0 1575 ; LEGACY-MESA-VI-LABEL: name: byref_constant_32bit_i32_arg 1576 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1577 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1578 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1579 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1580 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1581 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1582 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1583 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1584 ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1585 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load 4 from %ir.in.byref, addrspace 6) 1586 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1587 ; LEGACY-MESA-VI: S_ENDPGM 0 1588 %in = load i32, i32 addrspace(6)* %in.byref 1589 store i32 %in, i32 addrspace(1)* %out, align 4 1590 ret void 1591} 1592 1593define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(999)* byref(i32) align(4) %in.byref) { 1594 ; HSA-VI-LABEL: name: byref_unknown_as_i32_arg 1595 ; HSA-VI: bb.1 (%ir-block.0): 1596 ; HSA-VI: liveins: $sgpr4_sgpr5 1597 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1598 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1599 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1600 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1601 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1602 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1603 ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1604 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load 4 from %ir.in.byref, addrspace 999) 1605 ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1606 ; HSA-VI: S_ENDPGM 0 1607 ; LEGACY-MESA-VI-LABEL: name: byref_unknown_as_i32_arg 1608 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1609 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1610 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1611 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1612 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1613 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1614 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1615 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1616 ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1617 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load 4 from %ir.in.byref, addrspace 999) 1618 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1619 ; LEGACY-MESA-VI: S_ENDPGM 0 1620 %in = load i32, i32 addrspace(999)* %in.byref 1621 store i32 %in, i32 addrspace(1)* %out, align 4 1622 ret void 1623} 1624 1625; Invalid, but should not crash. 1626define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(3)* byref(i32) align(4) %in.byref) { 1627 ; HSA-VI-LABEL: name: byref_local_i32_arg 1628 ; HSA-VI: bb.1 (%ir-block.0): 1629 ; HSA-VI: liveins: $sgpr4_sgpr5 1630 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1631 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1632 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1633 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1634 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1635 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1636 ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1637 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load 4 from %ir.in.byref, addrspace 3) 1638 ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1639 ; HSA-VI: S_ENDPGM 0 1640 ; LEGACY-MESA-VI-LABEL: name: byref_local_i32_arg 1641 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1642 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1643 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1644 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1645 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1646 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1647 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1648 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1649 ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1650 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load 4 from %ir.in.byref, addrspace 3) 1651 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) 1652 ; LEGACY-MESA-VI: S_ENDPGM 0 1653 %in = load i32, i32 addrspace(3)* %in.byref 1654 store i32 %in, i32 addrspace(1)* %out, align 4 1655 ret void 1656} 1657 1658define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(4) %in0.byref, i32 addrspace(4)* byref(i32) align(4) %in1.byref, i32 %after.offset) { 1659 ; HSA-VI-LABEL: name: multi_byref_constant_i32_arg 1660 ; HSA-VI: bb.1 (%ir-block.0): 1661 ; HSA-VI: liveins: $sgpr4_sgpr5 1662 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1663 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1664 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1665 ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) 1666 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1667 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1668 ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 1669 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1670 ; HSA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 1671 ; HSA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) 1672 ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) 1673 ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in0.byref, addrspace 4) 1674 ; HSA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load 4 from %ir.in1.byref, addrspace 4) 1675 ; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1676 ; HSA-VI: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1677 ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1678 ; HSA-VI: S_ENDPGM 0 1679 ; LEGACY-MESA-VI-LABEL: name: multi_byref_constant_i32_arg 1680 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1681 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1682 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1683 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1684 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1685 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) 1686 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1687 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1688 ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 1689 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1690 ; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 1691 ; LEGACY-MESA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) 1692 ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load 4, addrspace 4) 1693 ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in0.byref, addrspace 4) 1694 ; LEGACY-MESA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load 4 from %ir.in1.byref, addrspace 4) 1695 ; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1696 ; LEGACY-MESA-VI: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1697 ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) 1698 ; LEGACY-MESA-VI: S_ENDPGM 0 1699 %in0 = load i32, i32 addrspace(4)* %in0.byref 1700 %in1 = load i32, i32 addrspace(4)* %in1.byref 1701 store volatile i32 %in0, i32 addrspace(1)* %out, align 4 1702 store volatile i32 %in1, i32 addrspace(1)* %out, align 4 1703 store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 1704 ret void 1705} 1706 1707define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byref(i32) align(4) %in.byref) { 1708 ; HSA-VI-LABEL: name: byref_constant_i32_arg_offset0 1709 ; HSA-VI: bb.1 (%ir-block.0): 1710 ; HSA-VI: liveins: $sgpr4_sgpr5 1711 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1712 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1713 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1714 ; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1715 ; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) 1716 ; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1717 ; HSA-VI: S_ENDPGM 0 1718 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0 1719 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1720 ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 1721 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 1722 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1723 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1724 ; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1725 ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) 1726 ; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1727 ; LEGACY-MESA-VI: S_ENDPGM 0 1728 %in = load i32, i32 addrspace(4)* %in.byref 1729 store i32 %in, i32 addrspace(1)* undef, align 4 1730 ret void 1731} 1732