1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s 3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 4; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s 5 6 7; FUNC-LABEL: {{^}}constant_load_i8: 8; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}} 9; GCN-HSA: flat_load_ubyte 10 11; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 12; TODO: NOT AND 13define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 14entry: 15 %ld = load i8, i8 addrspace(4)* %in 16 store i8 %ld, i8 addrspace(1)* %out 17 ret void 18} 19 20; FUNC-LABEL: {{^}}constant_load_v2i8: 21; GCN-NOHSA: buffer_load_ushort v 22; GCN-HSA: flat_load_ushort v 23 24; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 25define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 26entry: 27 %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in 28 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out 29 ret void 30} 31 32; FUNC-LABEL: {{^}}constant_load_v3i8: 33; GCN: s_load_dword s 34 35; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 36define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 { 37entry: 38 %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in 39 store <3 x i8> %ld, <3 x i8> addrspace(1)* %out 40 ret void 41} 42 43; FUNC-LABEL: {{^}}constant_load_v4i8: 44; GCN: s_load_dword s 45 46; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 47define amdgpu_kernel void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 48entry: 49 %ld = load <4 x i8>, <4 x i8> addrspace(4)* %in 50 store <4 x i8> %ld, <4 x i8> addrspace(1)* %out 51 ret void 52} 53 54; FUNC-LABEL: {{^}}constant_load_v8i8: 55; GCN: s_load_dwordx2 56 57; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 58define amdgpu_kernel void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 59entry: 60 %ld = load <8 x i8>, <8 x i8> addrspace(4)* %in 61 store <8 x i8> %ld, <8 x i8> addrspace(1)* %out 62 ret void 63} 64 65; FUNC-LABEL: {{^}}constant_load_v16i8: 66; GCN: s_load_dwordx4 67 68; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 69define amdgpu_kernel void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 70entry: 71 %ld = load <16 x i8>, <16 x i8> addrspace(4)* %in 72 store <16 x i8> %ld, <16 x i8> addrspace(1)* %out 73 ret void 74} 75 76; FUNC-LABEL: {{^}}constant_zextload_i8_to_i32: 77; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, 78; GCN-HSA: flat_load_ubyte 79 80; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 81define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 82 %a = load i8, i8 addrspace(4)* %in 83 %ext = zext i8 %a to i32 84 store i32 %ext, i32 addrspace(1)* %out 85 ret void 86} 87 88; FUNC-LABEL: {{^}}constant_sextload_i8_to_i32: 89; GCN-NOHSA: buffer_load_sbyte 90; GCN-HSA: flat_load_sbyte 91 92; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 93; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 94; EG: 8 95define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 96 %ld = load i8, i8 addrspace(4)* %in 97 %ext = sext i8 %ld to i32 98 store i32 %ext, i32 addrspace(1)* %out 99 ret void 100} 101 102; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i32: 103 104; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 105define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 { 106 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in 107 %ext = zext <1 x i8> %load to <1 x i32> 108 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 109 ret void 110} 111 112; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i32: 113 114; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 115; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 116; EG: 8 117define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 { 118 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in 119 %ext = sext <1 x i8> %load to <1 x i32> 120 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 121 ret void 122} 123 124; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i32: 125; GCN-NOHSA: buffer_load_ushort 126; GCN-HSA: flat_load_ushort 127 128; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 129; TODO: This should use DST, but for some there are redundant MOVs 130; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 131; EG: 8 132define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 133 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in 134 %ext = zext <2 x i8> %load to <2 x i32> 135 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 136 ret void 137} 138 139; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i32: 140; GCN-NOHSA: buffer_load_ushort 141 142; GCN-HSA: flat_load_ushort 143 144; GCN: v_bfe_i32 145; GCN: v_bfe_i32 146 147; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 148; TODO: These should use DST, but for some there are redundant MOVs 149; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 150; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 151; EG-DAG: 8 152; EG-DAG: 8 153define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 154 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in 155 %ext = sext <2 x i8> %load to <2 x i32> 156 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 157 ret void 158} 159 160; FUNC-LABEL: {{^}}constant_zextload_v3i8_to_v3i32: 161; GCN: s_load_dword s 162 163; GCN-DAG: s_bfe_u32 164; GCN-DAG: s_bfe_u32 165; GCN-DAG: s_and_b32 166 167; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 168; TODO: These should use DST, but for some there are redundant MOVs 169; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 170; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 171; EG-DAG: 8 172; EG-DAG: 8 173define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 { 174entry: 175 %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in 176 %ext = zext <3 x i8> %ld to <3 x i32> 177 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 178 ret void 179} 180 181; FUNC-LABEL: {{^}}constant_sextload_v3i8_to_v3i32: 182; GCN: s_load_dword s 183 184; GCN-DAG: s_bfe_i32 185; GCN-DAG: s_bfe_i32 186; GCN-DAG: s_bfe_i32 187 188; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 189; TODO: These should use DST, but for some there are redundant MOVs 190; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 191; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 192; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 193; EG-DAG: 8 194; EG-DAG: 8 195; EG-DAG: 8 196define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 { 197entry: 198 %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in 199 %ext = sext <3 x i8> %ld to <3 x i32> 200 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 201 ret void 202} 203 204; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i32: 205; GCN: s_load_dword s 206; GCN-DAG: s_and_b32 207; GCN-DAG: s_lshr_b32 208 209; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 210; TODO: These should use DST, but for some there are redundant MOVs 211; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 212; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 213; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 214; EG-DAG: 8 215; EG-DAG: 8 216; EG-DAG: 8 217define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 218 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in 219 %ext = zext <4 x i8> %load to <4 x i32> 220 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 221 ret void 222} 223 224; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i32: 225; GCN: s_load_dword s 226; GCN-DAG: s_sext_i32_i8 227; GCN-DAG: s_ashr_i32 228 229; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 230; TODO: These should use DST, but for some there are redundant MOVs 231; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 232; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 233; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 234; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 235; EG-DAG: 8 236; EG-DAG: 8 237; EG-DAG: 8 238; EG-DAG: 8 239define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 240 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in 241 %ext = sext <4 x i8> %load to <4 x i32> 242 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 243 ret void 244} 245 246; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i32: 247; GCN: s_load_dwordx2 248; GCN-DAG: s_and_b32 249; GCN-DAG: s_lshr_b32 250 251; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 252; TODO: These should use DST, but for some there are redundant MOVs 253; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 254; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 255; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 256; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 257; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 258; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 259; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 260; EG-DAG: 8 261; EG-DAG: 8 262; EG-DAG: 8 263; EG-DAG: 8 264; EG-DAG: 8 265; EG-DAG: 8 266; EG-DAG: 8 267define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 268 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in 269 %ext = zext <8 x i8> %load to <8 x i32> 270 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 271 ret void 272} 273 274; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i32: 275; GCN: s_load_dwordx2 276; GCN-DAG: s_ashr_i32 277; GCN-DAG: s_sext_i32_i8 278 279; EG: VTX_READ_64 [[DST:T[0-9]+\.XY]], T{{[0-9]+}}.X, 0, #1 280; TODO: These should use DST, but for some there are redundant MOVs 281; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 282; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 283; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 284; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 285; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 286; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 287; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 288; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 289; EG-DAG: 8 290; EG-DAG: 8 291; EG-DAG: 8 292; EG-DAG: 8 293; EG-DAG: 8 294; EG-DAG: 8 295; EG-DAG: 8 296; EG-DAG: 8 297define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 298 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in 299 %ext = sext <8 x i8> %load to <8 x i32> 300 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 301 ret void 302} 303 304; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i32: 305 306; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 307; TODO: These should use DST, but for some there are redundant MOVs 308; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 309; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 310; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 311; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 312; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 313; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 314; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 315; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 316; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 317; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 318; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 319; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 320; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 321; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 322; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 323; EG-DAG: 8 324; EG-DAG: 8 325; EG-DAG: 8 326; EG-DAG: 8 327; EG-DAG: 8 328; EG-DAG: 8 329; EG-DAG: 8 330; EG-DAG: 8 331; EG-DAG: 8 332; EG-DAG: 8 333; EG-DAG: 8 334; EG-DAG: 8 335; EG-DAG: 8 336; EG-DAG: 8 337; EG-DAG: 8 338define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 339 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in 340 %ext = zext <16 x i8> %load to <16 x i32> 341 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 342 ret void 343} 344 345; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i32: 346 347; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1 348; TODO: These should use DST, but for some there are redundant MOVs 349; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 350; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 351; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 352; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 353; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 354; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 355; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 356; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 357; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 358; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 359; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 360; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 361; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 362; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 363; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 364; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 365; EG-DAG: 8 366; EG-DAG: 8 367; EG-DAG: 8 368; EG-DAG: 8 369; EG-DAG: 8 370; EG-DAG: 8 371; EG-DAG: 8 372; EG-DAG: 8 373; EG-DAG: 8 374; EG-DAG: 8 375; EG-DAG: 8 376; EG-DAG: 8 377; EG-DAG: 8 378; EG-DAG: 8 379; EG-DAG: 8 380; EG-DAG: 8 381define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 382 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in 383 %ext = sext <16 x i8> %load to <16 x i32> 384 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 385 ret void 386} 387 388; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i32: 389 390; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 391; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 392; TODO: These should use DST, but for some there are redundant MOVs 393; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 394; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 395; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 396; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 397; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 398; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 399; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 400; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 401; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 402; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 403; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 404; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 405; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 406; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 407; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 408; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 409; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 410; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 411; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 412; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 413; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 414; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 415; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 416; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 417; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 418; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 419; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 420; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 421; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 422; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 423; EG-DAG: 8 424; EG-DAG: 8 425; EG-DAG: 8 426; EG-DAG: 8 427; EG-DAG: 8 428; EG-DAG: 8 429; EG-DAG: 8 430; EG-DAG: 8 431; EG-DAG: 8 432; EG-DAG: 8 433; EG-DAG: 8 434; EG-DAG: 8 435; EG-DAG: 8 436; EG-DAG: 8 437; EG-DAG: 8 438; EG-DAG: 8 439; EG-DAG: 8 440; EG-DAG: 8 441; EG-DAG: 8 442; EG-DAG: 8 443; EG-DAG: 8 444; EG-DAG: 8 445; EG-DAG: 8 446; EG-DAG: 8 447; EG-DAG: 8 448; EG-DAG: 8 449; EG-DAG: 8 450; EG-DAG: 8 451; EG-DAG: 8 452; EG-DAG: 8 453define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 { 454 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in 455 %ext = zext <32 x i8> %load to <32 x i32> 456 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 457 ret void 458} 459 460; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i32: 461 462; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1 463; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 16, #1 464; TODO: These should use DST, but for some there are redundant MOVs 465; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 466; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 467; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 468; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 469; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 470; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 471; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 472; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 473; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 474; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 475; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 476; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 477; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 478; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 479; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 480; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 481; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 482; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 483; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 484; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 485; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 486; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 487; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 488; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 489; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 490; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 491; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 492; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 493; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 494; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 495; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 496; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 497; EG-DAG: 8 498; EG-DAG: 8 499; EG-DAG: 8 500; EG-DAG: 8 501; EG-DAG: 8 502; EG-DAG: 8 503; EG-DAG: 8 504; EG-DAG: 8 505; EG-DAG: 8 506; EG-DAG: 8 507; EG-DAG: 8 508; EG-DAG: 8 509; EG-DAG: 8 510; EG-DAG: 8 511; EG-DAG: 8 512; EG-DAG: 8 513; EG-DAG: 8 514; EG-DAG: 8 515; EG-DAG: 8 516; EG-DAG: 8 517; EG-DAG: 8 518; EG-DAG: 8 519; EG-DAG: 8 520; EG-DAG: 8 521; EG-DAG: 8 522; EG-DAG: 8 523; EG-DAG: 8 524; EG-DAG: 8 525; EG-DAG: 8 526; EG-DAG: 8 527; EG-DAG: 8 528; EG-DAG: 8 529define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 { 530 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in 531 %ext = sext <32 x i8> %load to <32 x i32> 532 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 533 ret void 534} 535 536; FUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i32: 537 538; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1 539; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1 540; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1 541; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1 542define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 { 543 %load = load <64 x i8>, <64 x i8> addrspace(4)* %in 544 %ext = zext <64 x i8> %load to <64 x i32> 545 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 546 ret void 547} 548 549; FUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i32: 550 551; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1 552; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1 553; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1 554; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1 555define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 { 556 %load = load <64 x i8>, <64 x i8> addrspace(4)* %in 557 %ext = sext <64 x i8> %load to <64 x i32> 558 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 559 ret void 560} 561 562; FUNC-LABEL: {{^}}constant_zextload_i8_to_i64: 563; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 564 565; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]], 566; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] 567 568; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]], 569; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]] 570 571; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 572; EG: MOV {{.*}}, 0.0 573define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 574 %a = load i8, i8 addrspace(4)* %in 575 %ext = zext i8 %a to i64 576 store i64 %ext, i64 addrspace(1)* %out 577 ret void 578} 579 580; FUNC-LABEL: {{^}}constant_sextload_i8_to_i64: 581; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]], 582; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]], 583; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 584 585; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} 586; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 587 588; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 589; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal 590; TODO: Why not 7 ? 591; EG: 31 592define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 593 %a = load i8, i8 addrspace(4)* %in 594 %ext = sext i8 %a to i64 595 store i64 %ext, i64 addrspace(1)* %out 596 ret void 597} 598 599; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i64: 600 601; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 602; EG: MOV {{.*}}, 0.0 603define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 { 604 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in 605 %ext = zext <1 x i8> %load to <1 x i64> 606 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 607 ret void 608} 609 610; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i64: 611 612; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 613; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal 614; TODO: Why not 7 ? 615; EG: 31 616define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 { 617 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in 618 %ext = sext <1 x i8> %load to <1 x i64> 619 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 620 ret void 621} 622 623; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i64: 624 625; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 626define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 627 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in 628 %ext = zext <2 x i8> %load to <2 x i64> 629 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 630 ret void 631} 632 633; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i64: 634 635; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 636define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 637 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in 638 %ext = sext <2 x i8> %load to <2 x i64> 639 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 640 ret void 641} 642 643; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i64: 644 645; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 646define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 647 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in 648 %ext = zext <4 x i8> %load to <4 x i64> 649 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 650 ret void 651} 652 653; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i64: 654 655; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 656define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 657 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in 658 %ext = sext <4 x i8> %load to <4 x i64> 659 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 660 ret void 661} 662 663; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i64: 664 665; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 666define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 667 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in 668 %ext = zext <8 x i8> %load to <8 x i64> 669 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 670 ret void 671} 672 673; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i64: 674 675; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 676define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 677 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in 678 %ext = sext <8 x i8> %load to <8 x i64> 679 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 680 ret void 681} 682 683; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i64: 684 685; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 686define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 687 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in 688 %ext = zext <16 x i8> %load to <16 x i64> 689 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 690 ret void 691} 692 693; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i64: 694 695; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 696define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 697 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in 698 %ext = sext <16 x i8> %load to <16 x i64> 699 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 700 ret void 701} 702 703; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i64: 704 705; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 706; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 707define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 { 708 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in 709 %ext = zext <32 x i8> %load to <32 x i64> 710 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 711 ret void 712} 713 714; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i64: 715 716; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 717; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 718define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 { 719 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in 720 %ext = sext <32 x i8> %load to <32 x i64> 721 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 722 ret void 723} 724 725; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64: 726; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 { 727; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in 728; %ext = zext <64 x i8> %load to <64 x i64> 729; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 730; ret void 731; } 732 733; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64: 734; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 { 735; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in 736; %ext = sext <64 x i8> %load to <64 x i64> 737; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 738; ret void 739; } 740 741; FUNC-LABEL: {{^}}constant_zextload_i8_to_i16: 742; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]], 743; GCN-NOHSA: buffer_store_short v[[VAL]] 744 745; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]], 746; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]] 747define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 748 %a = load i8, i8 addrspace(4)* %in 749 %ext = zext i8 %a to i16 750 store i16 %ext, i16 addrspace(1)* %out 751 ret void 752} 753 754; FUNC-LABEL: {{^}}constant_sextload_i8_to_i16: 755; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]], 756; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]], 757 758; GCN-NOHSA: buffer_store_short v[[VAL]] 759; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]] 760 761; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 762define amdgpu_kernel void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 763 %a = load i8, i8 addrspace(4)* %in 764 %ext = sext i8 %a to i16 765 store i16 %ext, i16 addrspace(1)* %out 766 ret void 767} 768 769; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i16: 770define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 { 771 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in 772 %ext = zext <1 x i8> %load to <1 x i16> 773 store <1 x i16> %ext, <1 x i16> addrspace(1)* %out 774 ret void 775} 776 777; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i16: 778 779; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 780; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 781define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 { 782 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in 783 %ext = sext <1 x i8> %load to <1 x i16> 784 store <1 x i16> %ext, <1 x i16> addrspace(1)* %out 785 ret void 786} 787 788; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i16: 789 790; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 791define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 792 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in 793 %ext = zext <2 x i8> %load to <2 x i16> 794 store <2 x i16> %ext, <2 x i16> addrspace(1)* %out 795 ret void 796} 797 798; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i16: 799 800; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 801; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 802; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 803define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 804 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in 805 %ext = sext <2 x i8> %load to <2 x i16> 806 store <2 x i16> %ext, <2 x i16> addrspace(1)* %out 807 ret void 808} 809 810; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i16: 811 812; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 813define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 814 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in 815 %ext = zext <4 x i8> %load to <4 x i16> 816 store <4 x i16> %ext, <4 x i16> addrspace(1)* %out 817 ret void 818} 819 820; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i16: 821 822; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 823; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 824; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 825; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 826; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 827define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 828 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in 829 %ext = sext <4 x i8> %load to <4 x i16> 830 store <4 x i16> %ext, <4 x i16> addrspace(1)* %out 831 ret void 832} 833 834; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i16: 835 836; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 837define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 838 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in 839 %ext = zext <8 x i8> %load to <8 x i16> 840 store <8 x i16> %ext, <8 x i16> addrspace(1)* %out 841 ret void 842} 843 844; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i16: 845 846; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 847; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 848; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 849; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 850; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 851; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 852; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 853; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 854; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 855 856define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 857 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in 858 %ext = sext <8 x i8> %load to <8 x i16> 859 store <8 x i16> %ext, <8 x i16> addrspace(1)* %out 860 ret void 861} 862 863; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i16: 864 865; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 866define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 867 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in 868 %ext = zext <16 x i8> %load to <16 x i16> 869 store <16 x i16> %ext, <16 x i16> addrspace(1)* %out 870 ret void 871} 872 873; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i16: 874 875; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 876; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 877; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 878; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 879; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 880; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 881; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 882; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 883; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 884; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 885; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 886; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 887; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 888; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 889; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 890; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 891; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 892define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 893 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in 894 %ext = sext <16 x i8> %load to <16 x i16> 895 store <16 x i16> %ext, <16 x i16> addrspace(1)* %out 896 ret void 897} 898 899; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i16: 900 901; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 902; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 903define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 { 904 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in 905 %ext = zext <32 x i8> %load to <32 x i16> 906 store <32 x i16> %ext, <32 x i16> addrspace(1)* %out 907 ret void 908} 909 910; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i16: 911 912; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 913; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 914; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 915; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 916; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 917; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 918; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 919; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 920; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 921; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 922; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 923; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 924; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 925; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 926; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 927; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 928; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 929; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 930; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 931; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 932; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 933; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 934; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 935; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 936; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 937; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 938; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 939; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 940; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 941; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 942; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 943; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 944; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 945; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 946define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 { 947 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in 948 %ext = sext <32 x i8> %load to <32 x i16> 949 store <32 x i16> %ext, <32 x i16> addrspace(1)* %out 950 ret void 951} 952 953; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16: 954; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 { 955; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in 956; %ext = zext <64 x i8> %load to <64 x i16> 957; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out 958; ret void 959; } 960 961; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16: 962; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 { 963; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in 964; %ext = sext <64 x i8> %load to <64 x i16> 965; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out 966; ret void 967; } 968 969attributes #0 = { nounwind } 970