1; RUN: opt -S -instcombine -mtriple=amdgcn-amd-amdhsa %s | FileCheck %s 2 3; -------------------------------------------------------------------- 4; llvm.amdgcn.buffer.load 5; -------------------------------------------------------------------- 6 7; CHECK-LABEL: @buffer_load_f32( 8; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 9; CHECK-NEXT: ret float %data 10define amdgpu_ps float @buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 11 %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 12 ret float %data 13} 14 15; CHECK-LABEL: @buffer_load_v1f32( 16; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 17; CHECK-NEXT: ret <1 x float> %data 18define amdgpu_ps <1 x float> @buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 19 %data = call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 20 ret <1 x float> %data 21} 22 23; CHECK-LABEL: @buffer_load_v2f32( 24; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 25; CHECK-NEXT: ret <2 x float> %data 26define amdgpu_ps <2 x float> @buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 27 %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 28 ret <2 x float> %data 29} 30 31; CHECK-LABEL: @buffer_load_v4f32( 32; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 33; CHECK-NEXT: ret <4 x float> %data 34define amdgpu_ps <4 x float> @buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 35 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 36 ret <4 x float> %data 37} 38 39; CHECK-LABEL: @extract_elt0_buffer_load_v2f32( 40; CHECK: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 41; CHECK-NEXT: ret float %data 42define amdgpu_ps float @extract_elt0_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 43 %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 44 %elt0 = extractelement <2 x float> %data, i32 0 45 ret float %elt0 46} 47 48; CHECK-LABEL: @extract_elt1_buffer_load_v2f32( 49; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 50; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 51; CHECK-NEXT: ret float %elt1 52define amdgpu_ps float @extract_elt1_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 53 %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 54 %elt1 = extractelement <2 x float> %data, i32 1 55 ret float %elt1 56} 57 58; CHECK-LABEL: @extract_elt0_buffer_load_v4f32( 59; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 60; CHECK-NEXT: ret float %data 61define amdgpu_ps float @extract_elt0_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 62 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 63 %elt0 = extractelement <4 x float> %data, i32 0 64 ret float %elt0 65} 66 67; CHECK-LABEL: @extract_elt1_buffer_load_v4f32( 68; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 69; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 70; CHECK-NEXT: ret float %elt1 71define amdgpu_ps float @extract_elt1_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 72 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 73 %elt1 = extractelement <4 x float> %data, i32 1 74 ret float %elt1 75} 76 77; CHECK-LABEL: @extract_elt2_buffer_load_v4f32( 78; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 79; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 80; CHECK-NEXT: ret float %elt1 81define amdgpu_ps float @extract_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 82 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 83 %elt1 = extractelement <4 x float> %data, i32 2 84 ret float %elt1 85} 86 87; CHECK-LABEL: @extract_elt3_buffer_load_v4f32( 88; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 89; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 90; CHECK-NEXT: ret float %elt1 91define amdgpu_ps float @extract_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 92 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 93 %elt1 = extractelement <4 x float> %data, i32 3 94 ret float %elt1 95} 96 97; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v4f32( 98; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 99; CHECK-NEXT: ret <2 x float> 100define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 101 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 102 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 103 ret <2 x float> %shuf 104} 105 106; CHECK-LABEL: @extract_elt1_elt2_buffer_load_v4f32( 107; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 108; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 109; CHECK-NEXT: ret <2 x float> %shuf 110define amdgpu_ps <2 x float> @extract_elt1_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 111 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 112 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> 113 ret <2 x float> %shuf 114} 115 116; CHECK-LABEL: @extract_elt2_elt3_buffer_load_v4f32( 117; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 118; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 119; CHECK-NEXT: ret <2 x float> %shuf 120define amdgpu_ps <2 x float> @extract_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 121 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 122 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 123 ret <2 x float> %shuf 124} 125 126; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32( 127; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 128; CHECK-NEXT: ret <3 x float> %data 129define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 130 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 131 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 132 ret <3 x float> %shuf 133} 134 135; CHECK-LABEL: @extract_elt1_elt2_elt3_buffer_load_v4f32( 136; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 137; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 138; CHECK-NEXT: ret <3 x float> %shuf 139define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 140 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 141 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 142 ret <3 x float> %shuf 143} 144 145; CHECK-LABEL: @extract_elt0_elt2_elt3_buffer_load_v4f32( 146; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 147; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 148; CHECK-NEXT: ret <3 x float> %shuf 149define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 150 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 151 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 152 ret <3 x float> %shuf 153} 154 155; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v4f32_2( 156; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 157; CHECK-NEXT: %elt0 = extractelement <2 x float> %data, i32 0 158; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 159; CHECK-NEXT: %ins0 = insertvalue { float, float } undef, float %elt0, 0 160; CHECK-NEXT: %ins1 = insertvalue { float, float } %ins0, float %elt1, 1 161; CHECK-NEXT: ret { float, float } %ins1 162define amdgpu_ps { float, float } @extract_elt0_elt1_buffer_load_v4f32_2(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 163 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 164 %elt0 = extractelement <4 x float> %data, i32 0 165 %elt1 = extractelement <4 x float> %data, i32 1 166 %ins0 = insertvalue { float, float } undef, float %elt0, 0 167 %ins1 = insertvalue { float, float } %ins0, float %elt1, 1 168 ret { float, float } %ins1 169} 170 171; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_2( 172; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 173; CHECK-NEXT: %elt0 = extractelement <3 x float> %data, i32 0 174; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 1 175; CHECK-NEXT: %elt2 = extractelement <3 x float> %data, i32 2 176; CHECK-NEXT: %ins0 = insertvalue { float, float, float } undef, float %elt0, 0 177; CHECK-NEXT: %ins1 = insertvalue { float, float, float } %ins0, float %elt1, 1 178; CHECK-NEXT: %ins2 = insertvalue { float, float, float } %ins1, float %elt2, 2 179; CHECK-NEXT: ret { float, float, float } %ins2 180define amdgpu_ps { float, float, float } @extract_elt0_elt1_elt2_buffer_load_v4f32_2(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 181 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 182 %elt0 = extractelement <4 x float> %data, i32 0 183 %elt1 = extractelement <4 x float> %data, i32 1 184 %elt2 = extractelement <4 x float> %data, i32 2 185 %ins0 = insertvalue { float, float, float } undef, float %elt0, 0 186 %ins1 = insertvalue { float, float, float } %ins0, float %elt1, 1 187 %ins2 = insertvalue { float, float, float } %ins1, float %elt2, 2 188 ret { float, float, float } %ins2 189} 190 191; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_3( 192; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 193; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 2> 194; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 undef, i32 1> 195; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf 196define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_3(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 197 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 198 %elt0 = extractelement <4 x float> %data, i32 0 199 %elt2 = extractelement <4 x float> %data, i32 2 200 %ins0 = insertelement <2 x float> undef, float %elt0, i32 0 201 %ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1 202 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 4, i32 1> 203 %ret = fadd <2 x float> %ins1, %shuf 204 ret <2 x float> %ret 205} 206 207; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_4( 208; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 209; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 2> 210; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 undef> 211; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf 212; CHECK-NEXT: ret <2 x float> %ret 213define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_4(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 214 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 215 %elt0 = extractelement <4 x float> %data, i32 0 216 %elt2 = extractelement <4 x float> %data, i32 2 217 %ins0 = insertelement <2 x float> undef, float %elt0, i32 0 218 %ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1 219 %shuf = shufflevector <4 x float> undef, <4 x float> %data, <2 x i32> <i32 5, i32 1> 220 %ret = fadd <2 x float> %ins1, %shuf 221 ret <2 x float> %ret 222} 223 224; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_5( 225; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 226; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 2, i32 2> 227; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> 228; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf 229define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_5(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 230 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 231 %elt2 = extractelement <4 x float> %data, i32 2 232 %ins0 = insertelement <2 x float> undef, float %elt2, i32 0 233 %ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1 234 %shuf = shufflevector <4 x float> %data, <4 x float> %data, <2 x i32> <i32 0, i32 5> 235 %ret = fadd <2 x float> %ins1, %shuf 236 ret <2 x float> %ret 237} 238 239; CHECK-LABEL: @extract_elt0_buffer_load_v3f32( 240; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 241; CHECK-NEXT: ret float %data 242define amdgpu_ps float @extract_elt0_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 243 %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 244 %elt0 = extractelement <3 x float> %data, i32 0 245 ret float %elt0 246} 247 248; CHECK-LABEL: @extract_elt1_buffer_load_v3f32( 249; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 250; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 251; CHECK-NEXT: ret float %elt1 252define amdgpu_ps float @extract_elt1_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 253 %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 254 %elt1 = extractelement <3 x float> %data, i32 1 255 ret float %elt1 256} 257 258; CHECK-LABEL: @extract_elt2_buffer_load_v3f32( 259; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 260; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 261; CHECK-NEXT: ret float %elt1 262define amdgpu_ps float @extract_elt2_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 263 %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 264 %elt1 = extractelement <3 x float> %data, i32 2 265 ret float %elt1 266} 267 268; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v3f32( 269; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 270; CHECK-NEXT: ret <2 x float> 271define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 272 %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 273 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> 274 ret <2 x float> %shuf 275} 276 277; CHECK-LABEL: @extract_elt1_elt2_buffer_load_v3f32( 278; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 279; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 280; CHECK-NEXT: ret <2 x float> %shuf 281define amdgpu_ps <2 x float> @extract_elt1_elt2_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 282 %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 283 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 284 ret <2 x float> %shuf 285} 286 287; CHECK-LABEL: @preserve_metadata_extract_elt0_buffer_load_v2f32( 288; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false), !fpmath !0 289; CHECK-NEXT: ret float %data 290define amdgpu_ps float @preserve_metadata_extract_elt0_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 291 %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false), !fpmath !0 292 %elt0 = extractelement <2 x float> %data, i32 0 293 ret float %elt0 294} 295 296declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #1 297declare <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32>, i32, i32, i1, i1) #1 298declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) #1 299declare <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32>, i32, i32, i1, i1) #1 300declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #1 301 302; -------------------------------------------------------------------- 303; llvm.amdgcn.buffer.load.format 304; -------------------------------------------------------------------- 305 306; CHECK-LABEL: @buffer_load_format_v1f32( 307; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 true) 308; CHECK-NEXT: ret <1 x float> %data 309define amdgpu_ps <1 x float> @buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 310 %data = call <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 true) 311 ret <1 x float> %data 312} 313 314; CHECK-LABEL: @extract_elt0_buffer_load_format_v2f32( 315; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 true, i1 false) 316; CHECK-NEXT: ret float %data 317define amdgpu_ps float @extract_elt0_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 318 %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 true, i1 false) 319 %elt0 = extractelement <2 x float> %data, i32 0 320 ret float %elt0 321} 322 323; CHECK-LABEL: @extract_elt0_elt1_buffer_load_format_v3f32( 324; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 325; CHECK-NEXT: ret <2 x float> %data 326define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 327 %data = call <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 328 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> 329 ret <2 x float> %shuf 330} 331 332; CHECK-LABEL: @extract_elt0_elt1_buffer_load_format_v4f32( 333; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 334; CHECK-NEXT: ret <2 x float> %data 335define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { 336 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) 337 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 338 ret <2 x float> %shuf 339} 340 341; The initial insertion point is at the extractelement 342; CHECK-LABEL: @extract01_bitcast_buffer_load_format_v4f32( 343; CHECK-NEXT: %tmp = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) 344; CHECK-NEXT: %1 = shufflevector <2 x float> %tmp, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 345; CHECK-NEXT: %tmp1 = bitcast <4 x float> %1 to <2 x double> 346; CHECK-NEXT: %tmp2 = extractelement <2 x double> %tmp1, i32 0 347; CHECK-NEXT: ret double %tmp2 348define double @extract01_bitcast_buffer_load_format_v4f32(i32 %arg) #0 { 349 %tmp = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) #3 350 %tmp1 = bitcast <4 x float> %tmp to <2 x double> 351 %tmp2 = extractelement <2 x double> %tmp1, i32 0 352 ret double %tmp2 353} 354 355; CHECK-LABEL: @extract0_bitcast_buffer_load_format_v4f32( 356; CHECK-NEXT: %tmp = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) 357; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 358; CHECK-NEXT: ret i32 %tmp2 359define i32 @extract0_bitcast_buffer_load_format_v4f32(i32 %arg) #0 { 360 %tmp = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) #3 361 %tmp1 = bitcast <4 x float> %tmp to <4 x i32> 362 %tmp2 = extractelement <4 x i32> %tmp1, i32 0 363 ret i32 %tmp2 364} 365 366; CHECK-LABEL: @extract_lo16_0_bitcast_buffer_load_format_v4f32( 367; CHECK-NEXT: %tmp = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) 368; CHECK-NEXT: %1 = bitcast float %tmp to i32 369; CHECK-NEXT: %tmp2 = trunc i32 %1 to i16 370; CHECK-NEXT: ret i16 %tmp2 371define i16 @extract_lo16_0_bitcast_buffer_load_format_v4f32(i32 %arg) #0 { 372 %tmp = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) #3 373 %tmp1 = bitcast <4 x float> %tmp to <8 x i16> 374 %tmp2 = extractelement <8 x i16> %tmp1, i32 0 375 ret i16 %tmp2 376} 377 378declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #1 379declare <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32>, i32, i32, i1, i1) #1 380declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #1 381declare <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32>, i32, i32, i1, i1) #1 382declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 383 384; -------------------------------------------------------------------- 385; llvm.amdgcn.raw.buffer.load 386; -------------------------------------------------------------------- 387 388; CHECK-LABEL: @raw_buffer_load_f32( 389; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 390; CHECK-NEXT: ret float %data 391define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 392 %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 393 ret float %data 394} 395 396; CHECK-LABEL: @raw_buffer_load_v1f32( 397; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 398; CHECK-NEXT: ret <1 x float> %data 399define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 400 %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 401 ret <1 x float> %data 402} 403 404; CHECK-LABEL: @raw_buffer_load_v2f32( 405; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 406; CHECK-NEXT: ret <2 x float> %data 407define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 408 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 409 ret <2 x float> %data 410} 411 412; CHECK-LABEL: @raw_buffer_load_v4f32( 413; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 414; CHECK-NEXT: ret <4 x float> %data 415define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 416 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 417 ret <4 x float> %data 418} 419 420; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f32( 421; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 422; CHECK-NEXT: ret float %data 423define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 424 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 425 %elt0 = extractelement <2 x float> %data, i32 0 426 ret float %elt0 427} 428 429; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f32( 430; CHECK-NEXT: %1 = add i32 %ofs, 4 431; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 432; CHECK-NEXT: ret float %data 433define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 434 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 435 %elt1 = extractelement <2 x float> %data, i32 1 436 ret float %elt1 437} 438 439; CHECK-LABEL: @extract_elt0_raw_buffer_load_v4f32( 440; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 441; CHECK-NEXT: ret float %data 442define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 443 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 444 %elt0 = extractelement <4 x float> %data, i32 0 445 ret float %elt0 446} 447 448; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f32( 449; CHECK-NEXT: %1 = add i32 %ofs, 4 450; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 451; CHECK-NEXT: ret float %data 452define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 453 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 454 %elt1 = extractelement <4 x float> %data, i32 1 455 ret float %elt1 456} 457 458; CHECK-LABEL: @extract_elt2_raw_buffer_load_v4f32( 459; CHECK-NEXT: %1 = add i32 %ofs, 8 460; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 461; CHECK-NEXT: ret float %data 462define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 463 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 464 %elt1 = extractelement <4 x float> %data, i32 2 465 ret float %elt1 466} 467 468; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f32( 469; CHECK-NEXT: %1 = add i32 %ofs, 12 470; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 471; CHECK-NEXT: ret float %data 472define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 473 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 474 %elt1 = extractelement <4 x float> %data, i32 3 475 ret float %elt1 476} 477 478; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f32( 479; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 480; CHECK-NEXT: ret <2 x float> 481define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 482 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 483 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 484 ret <2 x float> %shuf 485} 486 487; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v4f32( 488; CHECK-NEXT: %1 = add i32 %ofs, 4 489; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 490; CHECK-NEXT: ret <2 x float> %data 491define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 492 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 493 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> 494 ret <2 x float> %shuf 495} 496 497; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_v4f32( 498; CHECK-NEXT: %1 = add i32 %ofs, 8 499; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 500; CHECK-NEXT: ret <2 x float> %data 501define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 502 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 503 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 504 ret <2 x float> %shuf 505} 506 507; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_v4f32( 508; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 509; CHECK-NEXT: ret <3 x float> %data 510define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 511 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 512 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 513 ret <3 x float> %shuf 514} 515 516; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_v4f32( 517; CHECK-NEXT: %1 = add i32 %ofs, 4 518; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 519; CHECK-NEXT: ret <3 x float> %data 520define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 521 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 522 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 523 ret <3 x float> %shuf 524} 525 526; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32( 527; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 528; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 529; CHECK-NEXT: ret <3 x float> %shuf 530define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 531 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 532 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 533 ret <3 x float> %shuf 534} 535 536; CHECK-LABEL: @extract_elt0_raw_buffer_load_v3f32( 537; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 538; CHECK-NEXT: ret float %data 539define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 540 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 541 %elt0 = extractelement <3 x float> %data, i32 0 542 ret float %elt0 543} 544 545; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f32( 546; CHECK-NEXT: %1 = add i32 %ofs, 4 547; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 548; CHECK-NEXT: ret float %data 549define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 550 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 551 %elt1 = extractelement <3 x float> %data, i32 1 552 ret float %elt1 553} 554 555; CHECK-LABEL: @extract_elt2_raw_buffer_load_v3f32( 556; CHECK-NEXT: %1 = add i32 %ofs, 8 557; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 558; CHECK-NEXT: ret float %data 559define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 560 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 561 %elt1 = extractelement <3 x float> %data, i32 2 562 ret float %elt1 563} 564 565; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v3f32( 566; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 567; CHECK-NEXT: ret <2 x float> 568define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 569 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 570 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> 571 ret <2 x float> %shuf 572} 573 574; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v3f32( 575; CHECK-NEXT: %1 = add i32 %ofs, 4 576; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 577; CHECK-NEXT: ret <2 x float> %data 578define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 579 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 580 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 581 ret <2 x float> %shuf 582} 583 584; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4f32( 585; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 586; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 587; CHECK-NEXT: ret i32 %tmp2 588define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 589 %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 590 %tmp1 = bitcast <4 x float> %tmp to <4 x i32> 591 %tmp2 = extractelement <4 x i32> %tmp1, i32 0 592 ret i32 %tmp2 593} 594 595; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4i32( 596; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 597; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float 598; CHECK-NEXT: ret float %tmp2 599define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 600 %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 601 %tmp1 = bitcast <4 x i32> %tmp to <4 x float> 602 %tmp2 = extractelement <4 x float> %tmp1, i32 0 603 ret float %tmp2 604} 605 606; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_v2f32( 607; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 608; CHECK-NEXT: ret float %data 609define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 610 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 611 %elt0 = extractelement <2 x float> %data, i32 0 612 ret float %elt0 613} 614 615declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #1 616declare <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32>, i32, i32, i32) #1 617declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #1 618declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32) #1 619declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1 620 621declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #1 622 623; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f16( 624; CHECK: %data = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 625; CHECK-NEXT: ret half %data 626define amdgpu_ps half @extract_elt0_raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 627 %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 628 %elt0 = extractelement <2 x half> %data, i32 0 629 ret half %elt0 630} 631 632; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f16( 633; CHECK-NEXT: %1 = add i32 %ofs, 2 634; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 635; CHECK-NEXT: ret half %data 636define amdgpu_ps half @extract_elt1_raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 637 %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 638 %elt1 = extractelement <2 x half> %data, i32 1 639 ret half %elt1 640} 641 642; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f16( 643; CHECK-NEXT: %1 = add i32 %ofs, 2 644; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 645; CHECK-NEXT: ret half %data 646define amdgpu_ps half @extract_elt1_raw_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 647 %data = call <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 648 %elt0 = extractelement <3 x half> %data, i32 1 649 ret half %elt0 650} 651 652; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f16( 653; CHECK-NEXT: %1 = add i32 %ofs, 2 654; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 655; CHECK-NEXT: ret half %data 656define amdgpu_ps half @extract_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 657 %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 658 %elt1 = extractelement <4 x half> %data, i32 1 659 ret half %elt1 660} 661 662; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f16( 663; CHECK-NEXT: %1 = add i32 %ofs, 6 664; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 665; CHECK-NEXT: ret half %data 666define amdgpu_ps half @extract_elt3_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 667 %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 668 %elt1 = extractelement <4 x half> %data, i32 3 669 ret half %elt1 670} 671 672; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f16( 673; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 674; CHECK-NEXT: ret <2 x half> 675define amdgpu_ps <2 x half> @extract_elt0_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 676 %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 677 %shuf = shufflevector <4 x half> %data, <4 x half> undef, <2 x i32> <i32 0, i32 1> 678 ret <2 x half> %shuf 679} 680 681declare half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32>, i32, i32, i32) #1 682declare <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32>, i32, i32, i32) #1 683declare <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32>, i32, i32, i32) #1 684declare <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32>, i32, i32, i32) #1 685 686; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2i8( 687; CHECK: %data = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 688; CHECK-NEXT: ret i8 %data 689define amdgpu_ps i8 @extract_elt0_raw_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 690 %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 691 %elt0 = extractelement <2 x i8> %data, i32 0 692 ret i8 %elt0 693} 694 695; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2i8( 696; CHECK-NEXT: %1 = add i32 %ofs, 1 697; CHECK-NEXT: %data = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 698; CHECK-NEXT: ret i8 %data 699define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 700 %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 701 %elt1 = extractelement <2 x i8> %data, i32 1 702 ret i8 %elt1 703} 704 705; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3i8( 706; CHECK-NEXT: %1 = add i32 %ofs, 1 707; CHECK-NEXT: %data = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 708; CHECK-NEXT: ret i8 %data 709define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 710 %data = call <3 x i8> @llvm.amdgcn.raw.buffer.load.v3i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 711 %elt0 = extractelement <3 x i8> %data, i32 1 712 ret i8 %elt0 713} 714 715; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4i8( 716; CHECK-NEXT: %1 = add i32 %ofs, 1 717; CHECK-NEXT: %data = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 718; CHECK-NEXT: ret i8 %data 719define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 720 %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 721 %elt1 = extractelement <4 x i8> %data, i32 1 722 ret i8 %elt1 723} 724 725; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4i8( 726; CHECK-NEXT: %1 = add i32 %ofs, 3 727; CHECK-NEXT: %data = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0) 728; CHECK-NEXT: ret i8 %data 729define amdgpu_ps i8 @extract_elt3_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 730 %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 731 %elt1 = extractelement <4 x i8> %data, i32 3 732 ret i8 %elt1 733} 734 735; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4i8( 736; CHECK-NEXT: %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 737; CHECK-NEXT: ret <2 x i8> 738define amdgpu_ps <2 x i8> @extract_elt0_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 739 %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 740 %shuf = shufflevector <4 x i8> %data, <4 x i8> undef, <2 x i32> <i32 0, i32 1> 741 ret <2 x i8> %shuf 742} 743 744declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32) #1 745declare <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32>, i32, i32, i32) #1 746declare <3 x i8> @llvm.amdgcn.raw.buffer.load.v3i8(<4 x i32>, i32, i32, i32) #1 747declare <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32>, i32, i32, i32) #1 748 749; -------------------------------------------------------------------- 750; llvm.amdgcn.s.buffer.load 751; -------------------------------------------------------------------- 752 753; CHECK-LABEL: @s_buffer_load_f32( 754; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 755; CHECK-NEXT: ret float %data 756define amdgpu_ps float @s_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 757 %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 758 ret float %data 759} 760 761; CHECK-LABEL: @s_buffer_load_v2f32( 762; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 763; CHECK-NEXT: ret <2 x float> %data 764define amdgpu_ps <2 x float> @s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 765 %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 766 ret <2 x float> %data 767} 768 769; CHECK-LABEL: @s_buffer_load_v4f32( 770; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 771; CHECK-NEXT: ret <4 x float> %data 772define amdgpu_ps <4 x float> @s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 773 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 774 ret <4 x float> %data 775} 776 777; CHECK-LABEL: @extract_elt0_s_buffer_load_v2f32( 778; CHECK: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 779; CHECK-NEXT: ret float %data 780define amdgpu_ps float @extract_elt0_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 781 %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 782 %elt0 = extractelement <2 x float> %data, i32 0 783 ret float %elt0 784} 785 786; CHECK-LABEL: @extract_elt1_s_buffer_load_v2f32( 787; CHECK-NEXT: %1 = add i32 %ofs, 4 788; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 0) 789; CHECK-NEXT: ret float %data 790define amdgpu_ps float @extract_elt1_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 791 %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 792 %elt1 = extractelement <2 x float> %data, i32 1 793 ret float %elt1 794} 795 796; CHECK-LABEL: @extract_elt0_s_buffer_load_v4f32( 797; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 798; CHECK-NEXT: ret float %data 799define amdgpu_ps float @extract_elt0_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 800 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 801 %elt0 = extractelement <4 x float> %data, i32 0 802 ret float %elt0 803} 804 805; CHECK-LABEL: @extract_elt1_s_buffer_load_v4f32( 806; CHECK-NEXT: %1 = add i32 %ofs, 4 807; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 0) 808; CHECK-NEXT: ret float %data 809define amdgpu_ps float @extract_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 810 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 811 %elt1 = extractelement <4 x float> %data, i32 1 812 ret float %elt1 813} 814 815; CHECK-LABEL: @extract_elt2_s_buffer_load_v4f32( 816; CHECK-NEXT: %1 = add i32 %ofs, 8 817; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 0) 818; CHECK-NEXT: ret float %data 819define amdgpu_ps float @extract_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 820 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 821 %elt1 = extractelement <4 x float> %data, i32 2 822 ret float %elt1 823} 824 825; CHECK-LABEL: @extract_elt3_s_buffer_load_v4f32( 826; CHECK-NEXT: %1 = add i32 %ofs, 12 827; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 0) 828; CHECK-NEXT: ret float %data 829define amdgpu_ps float @extract_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 830 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 831 %elt1 = extractelement <4 x float> %data, i32 3 832 ret float %elt1 833} 834 835; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4f32( 836; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 837; CHECK-NEXT: ret <2 x float> 838define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 839 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 840 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 841 ret <2 x float> %shuf 842} 843 844; CHECK-LABEL: @extract_elt1_elt2_s_buffer_load_v4f32( 845; CHECK-NEXT: %1 = add i32 %ofs, 4 846; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %1, i32 0) 847; CHECK-NEXT: ret <2 x float> %data 848define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 849 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 850 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> 851 ret <2 x float> %shuf 852} 853 854; CHECK-LABEL: @extract_elt2_elt3_s_buffer_load_v4f32( 855; CHECK-NEXT: %1 = add i32 %ofs, 8 856; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %1, i32 0) 857; CHECK-NEXT: ret <2 x float> %data 858define amdgpu_ps <2 x float> @extract_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 859 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 860 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 861 ret <2 x float> %shuf 862} 863 864; CHECK-LABEL: @extract_elt0_elt1_elt2_s_buffer_load_v4f32( 865; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 866; CHECK-NEXT: ret <3 x float> %data 867define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 868 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 869 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 870 ret <3 x float> %shuf 871} 872 873; CHECK-LABEL: @extract_elt0_elt2_elt3_s_buffer_load_v4f32( 874; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 875; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 876; CHECK-NEXT: ret <3 x float> %shuf 877define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 878 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 879 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 880 ret <3 x float> %shuf 881} 882 883; CHECK-LABEL: @extract_elt0_s_buffer_load_v3f32( 884; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 885; CHECK-NEXT: ret float %data 886define amdgpu_ps float @extract_elt0_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 887 %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 888 %elt0 = extractelement <3 x float> %data, i32 0 889 ret float %elt0 890} 891 892; CHECK-LABEL: @extract_elt1_s_buffer_load_v3f32( 893; CHECK-NEXT: %1 = add i32 %ofs, 4 894; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 0) 895; CHECK-NEXT: ret float %data 896define amdgpu_ps float @extract_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 897 %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 898 %elt1 = extractelement <3 x float> %data, i32 1 899 ret float %elt1 900} 901 902; CHECK-LABEL: @extract_elt2_s_buffer_load_v3f32( 903; CHECK-NEXT: %1 = add i32 %ofs, 8 904; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 0) 905; CHECK-NEXT: ret float %data 906define amdgpu_ps float @extract_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 907 %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 908 %elt1 = extractelement <3 x float> %data, i32 2 909 ret float %elt1 910} 911 912; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v3f32( 913; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 914; CHECK-NEXT: ret <2 x float> 915define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 916 %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 917 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> 918 ret <2 x float> %shuf 919} 920 921; CHECK-LABEL: @extract_elt1_elt2_s_buffer_load_v3f32( 922; CHECK-NEXT: %1 = add i32 %ofs, 4 923; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %1, i32 0) 924; CHECK-NEXT: ret <2 x float> %data 925define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 926 %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 927 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 928 ret <2 x float> %shuf 929} 930 931; Do not trim to vec3 s_buffer_load in instcombine, as the load will most likely be widened 932; to vec4 anyway during lowering. 933; CHECK-LABEL: @extract_elt1_elt2_elt3_s_buffer_load_v4f32( 934; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 935; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 936; CHECK-NEXT: ret <3 x float> %shuf 937define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 938 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 939 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 940 ret <3 x float> %shuf 941} 942 943; CHECK-LABEL: @extract0_bitcast_s_buffer_load_v4f32( 944; CHECK-NEXT: %tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 945; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 946; CHECK-NEXT: ret i32 %tmp2 947define i32 @extract0_bitcast_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 948 %tmp = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) 949 %tmp1 = bitcast <4 x float> %tmp to <4 x i32> 950 %tmp2 = extractelement <4 x i32> %tmp1, i32 0 951 ret i32 %tmp2 952} 953 954; CHECK-LABEL: @extract0_bitcast_s_buffer_load_v4i32( 955; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 0) 956; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float 957; CHECK-NEXT: ret float %tmp2 958define float @extract0_bitcast_s_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 959 %tmp = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 0) 960 %tmp1 = bitcast <4 x i32> %tmp to <4 x float> 961 %tmp2 = extractelement <4 x float> %tmp1, i32 0 962 ret float %tmp2 963} 964 965; CHECK-LABEL: @preserve_metadata_extract_elt0_s_buffer_load_v2f32( 966; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0), !fpmath !0 967; CHECK-NEXT: ret float %data 968define amdgpu_ps float @preserve_metadata_extract_elt0_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 969 %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0), !fpmath !0 970 %elt0 = extractelement <2 x float> %data, i32 0 971 ret float %elt0 972} 973 974declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1 975declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32) #1 976declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32) #1 977declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32) #1 978declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32) #1 979 980; CHECK-LABEL: @extract_elt0_s_buffer_load_v2f16( 981; CHECK: %data = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> %rsrc, i32 %ofs, i32 0) 982; CHECK-NEXT: ret half %data 983define amdgpu_ps half @extract_elt0_s_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 984 %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0) 985 %elt0 = extractelement <2 x half> %data, i32 0 986 ret half %elt0 987} 988 989; CHECK-LABEL: @extract_elt1_s_buffer_load_v2f16( 990; CHECK-NEXT: %1 = add i32 %ofs, 2 991; CHECK-NEXT: %data = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 0) 992; CHECK-NEXT: ret half %data 993define amdgpu_ps half @extract_elt1_s_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 994 %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0) 995 %elt1 = extractelement <2 x half> %data, i32 1 996 ret half %elt1 997} 998 999; CHECK-LABEL: @extract_elt1_s_buffer_load_v3f16( 1000; CHECK-NEXT: %1 = add i32 %ofs, 2 1001; CHECK-NEXT: %data = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 0) 1002; CHECK-NEXT: ret half %data 1003define amdgpu_ps half @extract_elt1_s_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 1004 %data = call <3 x half> @llvm.amdgcn.s.buffer.load.v3f16(<4 x i32> %rsrc, i32 %ofs, i32 0) 1005 %elt1 = extractelement <3 x half> %data, i32 1 1006 ret half %elt1 1007} 1008 1009; CHECK-LABEL: @extract_elt1_s_buffer_load_v4f16( 1010; CHECK-NEXT: %1 = add i32 %ofs, 2 1011; CHECK-NEXT: %data = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 0) 1012; CHECK-NEXT: ret half %data 1013define amdgpu_ps half @extract_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 1014 %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0) 1015 %elt1 = extractelement <4 x half> %data, i32 1 1016 ret half %elt1 1017} 1018 1019 1020; CHECK-LABEL: @extract_elt3_s_buffer_load_v4f16( 1021; CHECK-NEXT: %1 = add i32 %ofs, 6 1022; CHECK-NEXT: %data = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 0) 1023; CHECK-NEXT: ret half %data 1024define amdgpu_ps half @extract_elt3_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 1025 %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0) 1026 %elt1 = extractelement <4 x half> %data, i32 3 1027 ret half %elt1 1028} 1029 1030; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4f16( 1031; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0) 1032; CHECK-NEXT: ret <2 x half> 1033define amdgpu_ps <2 x half> @extract_elt0_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 1034 %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0) 1035 %shuf = shufflevector <4 x half> %data, <4 x half> undef, <2 x i32> <i32 0, i32 1> 1036 ret <2 x half> %shuf 1037} 1038 1039declare half @llvm.amdgcn.s.buffer.load.f16(<4 x i32>, i32, i32) #1 1040declare <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32>, i32, i32) #1 1041declare <3 x half> @llvm.amdgcn.s.buffer.load.v3f16(<4 x i32>, i32, i32) #1 1042declare <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32>, i32, i32) #1 1043 1044; CHECK-LABEL: @extract_elt0_s_buffer_load_v2i8( 1045; CHECK: %data = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %rsrc, i32 %ofs, i32 0) 1046; CHECK-NEXT: ret i8 %data 1047define amdgpu_ps i8 @extract_elt0_s_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 1048 %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0) 1049 %elt0 = extractelement <2 x i8> %data, i32 0 1050 ret i8 %elt0 1051} 1052 1053; CHECK-LABEL: @extract_elt1_s_buffer_load_v2i8( 1054; CHECK-NEXT: %1 = add i32 %ofs, 1 1055; CHECK-NEXT: %data = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 0) 1056; CHECK-NEXT: ret i8 %data 1057define amdgpu_ps i8 @extract_elt1_s_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 1058 %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0) 1059 %elt1 = extractelement <2 x i8> %data, i32 1 1060 ret i8 %elt1 1061} 1062 1063; CHECK-LABEL: @extract_elt1_s_buffer_load_v3i8( 1064; CHECK-NEXT: %1 = add i32 %ofs, 1 1065; CHECK-NEXT: %data = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 0) 1066; CHECK-NEXT: ret i8 %data 1067define amdgpu_ps i8 @extract_elt1_s_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 1068 %data = call <3 x i8> @llvm.amdgcn.s.buffer.load.v3i8(<4 x i32> %rsrc, i32 %ofs, i32 0) 1069 %elt1 = extractelement <3 x i8> %data, i32 1 1070 ret i8 %elt1 1071} 1072 1073; CHECK-LABEL: @extract_elt1_s_buffer_load_v4i8( 1074; CHECK-NEXT: %1 = add i32 %ofs, 1 1075; CHECK-NEXT: %data = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 0) 1076; CHECK-NEXT: ret i8 %data 1077define amdgpu_ps i8 @extract_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 1078 %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0) 1079 %elt1 = extractelement <4 x i8> %data, i32 1 1080 ret i8 %elt1 1081} 1082 1083; CHECK-LABEL: @extract_elt3_s_buffer_load_v4i8( 1084; CHECK-NEXT: %1 = add i32 %ofs, 3 1085; CHECK-NEXT: %data = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 0) 1086; CHECK-NEXT: ret i8 %data 1087define amdgpu_ps i8 @extract_elt3_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 1088 %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0) 1089 %elt1 = extractelement <4 x i8> %data, i32 3 1090 ret i8 %elt1 1091} 1092 1093; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4i8( 1094; CHECK-NEXT: %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0) 1095; CHECK-NEXT: ret <2 x i8> 1096define amdgpu_ps <2 x i8> @extract_elt0_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 { 1097 %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0) 1098 %shuf = shufflevector <4 x i8> %data, <4 x i8> undef, <2 x i32> <i32 0, i32 1> 1099 ret <2 x i8> %shuf 1100} 1101 1102declare i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32>, i32, i32) #1 1103declare <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32>, i32, i32) #1 1104declare <3 x i8> @llvm.amdgcn.s.buffer.load.v3i8(<4 x i32>, i32, i32) #1 1105declare <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32>, i32, i32) #1 1106 1107; -------------------------------------------------------------------- 1108; llvm.amdgcn.raw.buffer.load.format 1109; -------------------------------------------------------------------- 1110 1111; CHECK-LABEL: @raw_buffer_load_format_f32( 1112; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1113; CHECK-NEXT: ret float %data 1114define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1115 %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1116 ret float %data 1117} 1118 1119; CHECK-LABEL: @raw_buffer_load_format_v1f32( 1120; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1121; CHECK-NEXT: ret <1 x float> %data 1122define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1123 %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1124 ret <1 x float> %data 1125} 1126 1127; CHECK-LABEL: @raw_buffer_load_format_v2f32( 1128; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1129; CHECK-NEXT: ret <2 x float> %data 1130define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1131 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1132 ret <2 x float> %data 1133} 1134 1135; CHECK-LABEL: @raw_buffer_load_format_v4f32( 1136; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1137; CHECK-NEXT: ret <4 x float> %data 1138define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1139 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1140 ret <4 x float> %data 1141} 1142 1143; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v2f32( 1144; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1145; CHECK-NEXT: ret float %data 1146define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1147 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1148 %elt0 = extractelement <2 x float> %data, i32 0 1149 ret float %elt0 1150} 1151 1152; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v2f32( 1153; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1154; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 1155; CHECK-NEXT: ret float %elt1 1156define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1157 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1158 %elt1 = extractelement <2 x float> %data, i32 1 1159 ret float %elt1 1160} 1161 1162; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v4f32( 1163; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1164; CHECK-NEXT: ret float %data 1165define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1166 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1167 %elt0 = extractelement <4 x float> %data, i32 0 1168 ret float %elt0 1169} 1170 1171; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v4f32( 1172; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1173; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 1174; CHECK-NEXT: ret float %elt1 1175define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1176 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1177 %elt1 = extractelement <4 x float> %data, i32 1 1178 ret float %elt1 1179} 1180 1181; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v4f32( 1182; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1183; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 1184; CHECK-NEXT: ret float %elt1 1185define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1186 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1187 %elt1 = extractelement <4 x float> %data, i32 2 1188 ret float %elt1 1189} 1190 1191; CHECK-LABEL: @extract_elt3_raw_buffer_load_format_v4f32( 1192; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1193; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 1194; CHECK-NEXT: ret float %elt1 1195define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1196 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1197 %elt1 = extractelement <4 x float> %data, i32 3 1198 ret float %elt1 1199} 1200 1201; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v4f32( 1202; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1203; CHECK-NEXT: ret <2 x float> 1204define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1205 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1206 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 1207 ret <2 x float> %shuf 1208} 1209 1210; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32( 1211; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1212; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 1213; CHECK-NEXT: ret <2 x float> %shuf 1214define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1215 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1216 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> 1217 ret <2 x float> %shuf 1218} 1219 1220; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32( 1221; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1222; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 1223; CHECK-NEXT: ret <2 x float> %shuf 1224define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1225 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1226 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 1227 ret <2 x float> %shuf 1228} 1229 1230; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32( 1231; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1232; CHECK-NEXT: ret <3 x float> %data 1233define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1234 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1235 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 1236 ret <3 x float> %shuf 1237} 1238 1239; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32( 1240; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1241; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 1242; CHECK-NEXT: ret <3 x float> %shuf 1243define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1244 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1245 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 1246 ret <3 x float> %shuf 1247} 1248 1249; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32( 1250; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1251; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 1252; CHECK-NEXT: ret <3 x float> %shuf 1253define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1254 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1255 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 1256 ret <3 x float> %shuf 1257} 1258 1259; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v3f32( 1260; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1261; CHECK-NEXT: ret float %data 1262define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1263 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1264 %elt0 = extractelement <3 x float> %data, i32 0 1265 ret float %elt0 1266} 1267 1268; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v3f32( 1269; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1270; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 1271; CHECK-NEXT: ret float %elt1 1272define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1273 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1274 %elt1 = extractelement <3 x float> %data, i32 1 1275 ret float %elt1 1276} 1277 1278; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v3f32( 1279; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1280; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 1281; CHECK-NEXT: ret float %elt1 1282define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1283 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1284 %elt1 = extractelement <3 x float> %data, i32 2 1285 ret float %elt1 1286} 1287 1288; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v3f32( 1289; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1290; CHECK-NEXT: ret <2 x float> 1291define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1292 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1293 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> 1294 ret <2 x float> %shuf 1295} 1296 1297; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32( 1298; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1299; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 1300; CHECK-NEXT: ret <2 x float> %shuf 1301define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1302 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1303 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 1304 ret <2 x float> %shuf 1305} 1306 1307; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4f32( 1308; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1309; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 1310; CHECK-NEXT: ret i32 %tmp2 1311define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1312 %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1313 %tmp1 = bitcast <4 x float> %tmp to <4 x i32> 1314 %tmp2 = extractelement <4 x i32> %tmp1, i32 0 1315 ret i32 %tmp2 1316} 1317 1318; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4i32( 1319; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1320; CHECK-NEXT: ret float %tmp 1321define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1322 %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) 1323 %tmp1 = extractelement <4 x float> %tmp, i32 0 1324 ret float %tmp1 1325} 1326 1327; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32( 1328; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 1329; CHECK-NEXT: ret float %data 1330define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { 1331 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 1332 %elt0 = extractelement <2 x float> %data, i32 0 1333 ret float %elt0 1334} 1335 1336declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32) #1 1337declare <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32) #1 1338declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32) #1 1339declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32) #1 1340declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #1 1341 1342; -------------------------------------------------------------------- 1343; llvm.amdgcn.struct.buffer.load 1344; -------------------------------------------------------------------- 1345 1346; CHECK-LABEL: @struct_buffer_load_f32( 1347; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1348; CHECK-NEXT: ret float %data 1349define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1350 %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1351 ret float %data 1352} 1353 1354; CHECK-LABEL: @struct_buffer_load_v1f32( 1355; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1356; CHECK-NEXT: ret <1 x float> %data 1357define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1358 %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1359 ret <1 x float> %data 1360} 1361 1362; CHECK-LABEL: @struct_buffer_load_v2f32( 1363; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1364; CHECK-NEXT: ret <2 x float> %data 1365define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1366 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1367 ret <2 x float> %data 1368} 1369 1370; CHECK-LABEL: @struct_buffer_load_v4f32( 1371; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1372; CHECK-NEXT: ret <4 x float> %data 1373define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1374 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1375 ret <4 x float> %data 1376} 1377 1378; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f32( 1379; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1380; CHECK-NEXT: ret float %data 1381define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1382 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1383 %elt0 = extractelement <2 x float> %data, i32 0 1384 ret float %elt0 1385} 1386 1387; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f32( 1388; CHECK-NEXT: %1 = add i32 %ofs, 4 1389; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1390; CHECK-NEXT: ret float %data 1391define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1392 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1393 %elt1 = extractelement <2 x float> %data, i32 1 1394 ret float %elt1 1395} 1396 1397; CHECK-LABEL: @extract_elt0_struct_buffer_load_v4f32( 1398; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1399; CHECK-NEXT: ret float %data 1400define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1401 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1402 %elt0 = extractelement <4 x float> %data, i32 0 1403 ret float %elt0 1404} 1405 1406; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f32( 1407; CHECK-NEXT: %1 = add i32 %ofs, 4 1408; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1409; CHECK-NEXT: ret float %data 1410define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1411 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1412 %elt1 = extractelement <4 x float> %data, i32 1 1413 ret float %elt1 1414} 1415 1416; CHECK-LABEL: @extract_elt2_struct_buffer_load_v4f32( 1417; CHECK-NEXT: %1 = add i32 %ofs, 8 1418; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1419; CHECK-NEXT: ret float %data 1420define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1421 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1422 %elt1 = extractelement <4 x float> %data, i32 2 1423 ret float %elt1 1424} 1425 1426; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f32( 1427; CHECK-NEXT: %1 = add i32 %ofs, 12 1428; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1429; CHECK-NEXT: ret float %data 1430define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1431 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1432 %elt1 = extractelement <4 x float> %data, i32 3 1433 ret float %elt1 1434} 1435 1436; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f32( 1437; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1438; CHECK-NEXT: ret <2 x float> 1439define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1440 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1441 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 1442 ret <2 x float> %shuf 1443} 1444 1445; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v4f32( 1446; CHECK-NEXT: %1 = add i32 %ofs, 4 1447; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1448; CHECK-NEXT: ret <2 x float> %data 1449define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1450 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1451 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> 1452 ret <2 x float> %shuf 1453} 1454 1455; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_v4f32( 1456; CHECK-NEXT: %1 = add i32 %ofs, 8 1457; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1458; CHECK-NEXT: ret <2 x float> %data 1459define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1460 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1461 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 1462 ret <2 x float> %shuf 1463} 1464 1465; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_v4f32( 1466; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1467; CHECK-NEXT: ret <3 x float> %data 1468define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1469 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1470 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 1471 ret <3 x float> %shuf 1472} 1473 1474; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_v4f32( 1475; CHECK-NEXT: %1 = add i32 %ofs, 4 1476; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1477; CHECK-NEXT: ret <3 x float> %data 1478define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1479 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1480 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 1481 ret <3 x float> %shuf 1482} 1483 1484; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32( 1485; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1486; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 1487; CHECK-NEXT: ret <3 x float> %shuf 1488define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1489 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1490 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 1491 ret <3 x float> %shuf 1492} 1493 1494; CHECK-LABEL: @extract_elt0_struct_buffer_load_v3f32( 1495; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1496; CHECK-NEXT: ret float %data 1497define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1498 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1499 %elt0 = extractelement <3 x float> %data, i32 0 1500 ret float %elt0 1501} 1502 1503; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f32( 1504; CHECK-NEXT: %1 = add i32 %ofs, 4 1505; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1506; CHECK-NEXT: ret float %data 1507define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1508 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1509 %elt1 = extractelement <3 x float> %data, i32 1 1510 ret float %elt1 1511} 1512 1513; CHECK-LABEL: @extract_elt2_struct_buffer_load_v3f32( 1514; CHECK-NEXT: %1 = add i32 %ofs, 8 1515; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1516; CHECK-NEXT: ret float %data 1517define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1518 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1519 %elt1 = extractelement <3 x float> %data, i32 2 1520 ret float %elt1 1521} 1522 1523; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v3f32( 1524; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1525; CHECK-NEXT: ret <2 x float> 1526define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1527 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1528 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> 1529 ret <2 x float> %shuf 1530} 1531 1532; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v3f32( 1533; CHECK-NEXT: %1 = add i32 %ofs, 4 1534; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1535; CHECK-NEXT: ret <2 x float> %data 1536define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1537 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1538 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 1539 ret <2 x float> %shuf 1540} 1541 1542; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4f32( 1543; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1544; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 1545; CHECK-NEXT: ret i32 %tmp2 1546define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1547 %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1548 %tmp1 = bitcast <4 x float> %tmp to <4 x i32> 1549 %tmp2 = extractelement <4 x i32> %tmp1, i32 0 1550 ret i32 %tmp2 1551} 1552 1553; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4i32( 1554; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1555; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float 1556; CHECK-NEXT: ret float %tmp2 1557define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1558 %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1559 %tmp1 = bitcast <4 x i32> %tmp to <4 x float> 1560 %tmp2 = extractelement <4 x float> %tmp1, i32 0 1561 ret float %tmp2 1562} 1563 1564; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_v2f32( 1565; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 1566; CHECK-NEXT: ret float %data 1567define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1568 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 1569 %elt0 = extractelement <2 x float> %data, i32 0 1570 ret float %elt0 1571} 1572 1573declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #1 1574declare <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32>, i32, i32, i32, i32) #1 1575declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #1 1576declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #1 1577declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1 1578 1579declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1 1580 1581; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f16( 1582; CHECK: %data = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1583; CHECK-NEXT: ret half %data 1584define amdgpu_ps half @extract_elt0_struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1585 %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1586 %elt0 = extractelement <2 x half> %data, i32 0 1587 ret half %elt0 1588} 1589 1590; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f16( 1591; CHECK-NEXT: %1 = add i32 %ofs, 2 1592; CHECK-NEXT: %data = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1593; CHECK-NEXT: ret half %data 1594define amdgpu_ps half @extract_elt1_struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1595 %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1596 %elt1 = extractelement <2 x half> %data, i32 1 1597 ret half %elt1 1598} 1599 1600; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f16( 1601; CHECK-NEXT: %1 = add i32 %ofs, 2 1602; CHECK-NEXT: %data = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1603; CHECK-NEXT: ret half %data 1604define amdgpu_ps half @extract_elt1_struct_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1605 %data = call <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1606 %elt1 = extractelement <3 x half> %data, i32 1 1607 ret half %elt1 1608} 1609 1610; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f16( 1611; CHECK-NEXT: %1 = add i32 %ofs, 2 1612; CHECK-NEXT: %data = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1613; CHECK-NEXT: ret half %data 1614define amdgpu_ps half @extract_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1615 %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1616 %elt1 = extractelement <4 x half> %data, i32 1 1617 ret half %elt1 1618} 1619 1620; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f16( 1621; CHECK-NEXT: %1 = add i32 %ofs, 6 1622; CHECK-NEXT: %data = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1623; CHECK-NEXT: ret half %data 1624define amdgpu_ps half @extract_elt3_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1625 %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1626 %elt1 = extractelement <4 x half> %data, i32 3 1627 ret half %elt1 1628} 1629 1630; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f16( 1631; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1632; CHECK-NEXT: ret <2 x half> 1633define amdgpu_ps <2 x half> @extract_elt0_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1634 %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1635 %shuf = shufflevector <4 x half> %data, <4 x half> undef, <2 x i32> <i32 0, i32 1> 1636 ret <2 x half> %shuf 1637} 1638 1639declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1 1640declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1 1641declare <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1 1642declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1 1643 1644; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2i8( 1645; CHECK: %data = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1646; CHECK-NEXT: ret i8 %data 1647define amdgpu_ps i8 @extract_elt0_struct_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1648 %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1649 %elt0 = extractelement <2 x i8> %data, i32 0 1650 ret i8 %elt0 1651} 1652 1653; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2i8( 1654; CHECK-NEXT: %1 = add i32 %ofs, 1 1655; CHECK-NEXT: %data = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1656; CHECK-NEXT: ret i8 %data 1657define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1658 %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1659 %elt1 = extractelement <2 x i8> %data, i32 1 1660 ret i8 %elt1 1661} 1662 1663; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3i8( 1664; CHECK-NEXT: %1 = add i32 %ofs, 1 1665; CHECK-NEXT: %data = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1666; CHECK-NEXT: ret i8 %data 1667define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1668 %data = call <3 x i8> @llvm.amdgcn.struct.buffer.load.v3i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1669 %elt1 = extractelement <3 x i8> %data, i32 1 1670 ret i8 %elt1 1671} 1672 1673; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4i8( 1674; CHECK-NEXT: %1 = add i32 %ofs, 1 1675; CHECK-NEXT: %data = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1676; CHECK-NEXT: ret i8 %data 1677define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1678 %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1679 %elt1 = extractelement <4 x i8> %data, i32 1 1680 ret i8 %elt1 1681} 1682 1683; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4i8( 1684; CHECK-NEXT: %1 = add i32 %ofs, 3 1685; CHECK-NEXT: %data = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0) 1686; CHECK-NEXT: ret i8 %data 1687define amdgpu_ps i8 @extract_elt3_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1688 %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1689 %elt1 = extractelement <4 x i8> %data, i32 3 1690 ret i8 %elt1 1691} 1692 1693; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4i8( 1694; CHECK-NEXT: %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1695; CHECK-NEXT: ret <2 x i8> 1696define amdgpu_ps <2 x i8> @extract_elt0_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1697 %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1698 %shuf = shufflevector <4 x i8> %data, <4 x i8> undef, <2 x i32> <i32 0, i32 1> 1699 ret <2 x i8> %shuf 1700} 1701 1702declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32) #1 1703declare <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32>, i32, i32, i32, i32) #1 1704declare <3 x i8> @llvm.amdgcn.struct.buffer.load.v3i8(<4 x i32>, i32, i32, i32, i32) #1 1705declare <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32>, i32, i32, i32, i32) #1 1706 1707; -------------------------------------------------------------------- 1708; llvm.amdgcn.struct.buffer.load.format 1709; -------------------------------------------------------------------- 1710 1711; CHECK-LABEL: @struct_buffer_load_format_f32( 1712; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1713; CHECK-NEXT: ret float %data 1714define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1715 %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1716 ret float %data 1717} 1718 1719; CHECK-LABEL: @struct_buffer_load_format_v1f32( 1720; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1721; CHECK-NEXT: ret <1 x float> %data 1722define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1723 %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1724 ret <1 x float> %data 1725} 1726 1727; CHECK-LABEL: @struct_buffer_load_format_v2f32( 1728; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1729; CHECK-NEXT: ret <2 x float> %data 1730define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1731 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1732 ret <2 x float> %data 1733} 1734 1735; CHECK-LABEL: @struct_buffer_load_format_v4f32( 1736; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1737; CHECK-NEXT: ret <4 x float> %data 1738define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1739 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1740 ret <4 x float> %data 1741} 1742 1743; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v2f32( 1744; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1745; CHECK-NEXT: ret float %data 1746define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1747 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1748 %elt0 = extractelement <2 x float> %data, i32 0 1749 ret float %elt0 1750} 1751 1752; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v2f32( 1753; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1754; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 1755; CHECK-NEXT: ret float %elt1 1756define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1757 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1758 %elt1 = extractelement <2 x float> %data, i32 1 1759 ret float %elt1 1760} 1761 1762; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v4f32( 1763; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1764; CHECK-NEXT: ret float %data 1765define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1766 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1767 %elt0 = extractelement <4 x float> %data, i32 0 1768 ret float %elt0 1769} 1770 1771; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v4f32( 1772; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1773; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 1774; CHECK-NEXT: ret float %elt1 1775define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1776 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1777 %elt1 = extractelement <4 x float> %data, i32 1 1778 ret float %elt1 1779} 1780 1781; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v4f32( 1782; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1783; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 1784; CHECK-NEXT: ret float %elt1 1785define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1786 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1787 %elt1 = extractelement <4 x float> %data, i32 2 1788 ret float %elt1 1789} 1790 1791; CHECK-LABEL: @extract_elt3_struct_buffer_load_format_v4f32( 1792; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1793; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 1794; CHECK-NEXT: ret float %elt1 1795define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1796 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1797 %elt1 = extractelement <4 x float> %data, i32 3 1798 ret float %elt1 1799} 1800 1801; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v4f32( 1802; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1803; CHECK-NEXT: ret <2 x float> 1804define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1805 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1806 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 1807 ret <2 x float> %shuf 1808} 1809 1810; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32( 1811; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1812; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 1813; CHECK-NEXT: ret <2 x float> %shuf 1814define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1815 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1816 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> 1817 ret <2 x float> %shuf 1818} 1819 1820; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32( 1821; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1822; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 1823; CHECK-NEXT: ret <2 x float> %shuf 1824define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1825 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1826 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 1827 ret <2 x float> %shuf 1828} 1829 1830; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32( 1831; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1832; CHECK-NEXT: ret <3 x float> %data 1833define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1834 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1835 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 1836 ret <3 x float> %shuf 1837} 1838 1839; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32( 1840; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1841; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 1842; CHECK-NEXT: ret <3 x float> %shuf 1843define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1844 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1845 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 1846 ret <3 x float> %shuf 1847} 1848 1849; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32( 1850; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1851; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 1852; CHECK-NEXT: ret <3 x float> %shuf 1853define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1854 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1855 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 1856 ret <3 x float> %shuf 1857} 1858 1859; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v3f32( 1860; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1861; CHECK-NEXT: ret float %data 1862define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1863 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1864 %elt0 = extractelement <3 x float> %data, i32 0 1865 ret float %elt0 1866} 1867 1868; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v3f32( 1869; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1870; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 1871; CHECK-NEXT: ret float %elt1 1872define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1873 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1874 %elt1 = extractelement <3 x float> %data, i32 1 1875 ret float %elt1 1876} 1877 1878; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v3f32( 1879; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1880; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 1881; CHECK-NEXT: ret float %elt1 1882define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1883 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1884 %elt1 = extractelement <3 x float> %data, i32 2 1885 ret float %elt1 1886} 1887 1888; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v3f32( 1889; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1890; CHECK-NEXT: ret <2 x float> 1891define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1892 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1893 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> 1894 ret <2 x float> %shuf 1895} 1896 1897; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32( 1898; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1899; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 1900; CHECK-NEXT: ret <2 x float> %shuf 1901define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1902 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1903 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 1904 ret <2 x float> %shuf 1905} 1906 1907; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4f32( 1908; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1909; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 1910; CHECK-NEXT: ret i32 %tmp2 1911define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1912 %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) 1913 %tmp1 = bitcast <4 x float> %tmp to <4 x i32> 1914 %tmp2 = extractelement <4 x i32> %tmp1, i32 0 1915 ret i32 %tmp2 1916} 1917 1918; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32( 1919; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 1920; CHECK-NEXT: ret float %data 1921define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { 1922 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 1923 %elt0 = extractelement <2 x float> %data, i32 0 1924 ret float %elt0 1925} 1926 1927declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) #1 1928declare <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32, i32) #1 1929declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32) #1 1930declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32, i32) #1 1931declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #1 1932 1933declare <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32>, i32, i32, i32, i32) #1 1934 1935; -------------------------------------------------------------------- 1936; llvm.amdgcn.raw.tbuffer.load 1937; -------------------------------------------------------------------- 1938 1939; CHECK-LABEL: @raw_tbuffer_load_f32( 1940; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1941; CHECK-NEXT: ret float %data 1942define amdgpu_ps float @raw_tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 1943 %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1944 ret float %data 1945} 1946 1947; CHECK-LABEL: @raw_tbuffer_load_v2f32( 1948; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1949; CHECK-NEXT: ret <2 x float> %data 1950define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 1951 %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1952 ret <2 x float> %data 1953} 1954 1955; CHECK-LABEL: @raw_tbuffer_load_v4f32( 1956; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1957; CHECK-NEXT: ret <4 x float> %data 1958define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 1959 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1960 ret <4 x float> %data 1961} 1962 1963; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v2f32( 1964; CHECK: %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1965; CHECK-NEXT: ret float %data 1966define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 1967 %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1968 %elt0 = extractelement <2 x float> %data, i32 0 1969 ret float %elt0 1970} 1971 1972; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v2f32( 1973; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1974; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 1975; CHECK-NEXT: ret float %elt1 1976define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 1977 %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1978 %elt1 = extractelement <2 x float> %data, i32 1 1979 ret float %elt1 1980} 1981 1982; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f32( 1983; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1984; CHECK-NEXT: ret float %data 1985define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 1986 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1987 %elt0 = extractelement <4 x float> %data, i32 0 1988 ret float %elt0 1989} 1990 1991; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f32( 1992; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1993; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 1994; CHECK-NEXT: ret float %elt1 1995define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 1996 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 1997 %elt1 = extractelement <4 x float> %data, i32 1 1998 ret float %elt1 1999} 2000 2001; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f32( 2002; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2003; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 2004; CHECK-NEXT: ret float %elt1 2005define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2006 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2007 %elt1 = extractelement <4 x float> %data, i32 2 2008 ret float %elt1 2009} 2010 2011; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f32( 2012; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2013; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 2014; CHECK-NEXT: ret float %elt1 2015define amdgpu_ps float @extract_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2016 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2017 %elt1 = extractelement <4 x float> %data, i32 3 2018 ret float %elt1 2019} 2020 2021; CHECK-LABEL: @extract_elt0_elt1_raw_tbuffer_load_v4f32( 2022; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2023; CHECK-NEXT: ret <2 x float> 2024define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2025 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2026 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 2027 ret <2 x float> %shuf 2028} 2029 2030; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v4f32( 2031; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2032; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 2033; CHECK-NEXT: ret <2 x float> %shuf 2034define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2035 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2036 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> 2037 ret <2 x float> %shuf 2038} 2039 2040; CHECK-LABEL: @extract_elt2_elt3_raw_tbuffer_load_v4f32( 2041; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2042; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 2043; CHECK-NEXT: ret <2 x float> %shuf 2044define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2045 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2046 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 2047 ret <2 x float> %shuf 2048} 2049 2050; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32( 2051; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2052; CHECK-NEXT: ret <3 x float> %data 2053define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2054 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2055 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 2056 ret <3 x float> %shuf 2057} 2058 2059; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32( 2060; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2061; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 2062; CHECK-NEXT: ret <3 x float> %shuf 2063define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2064 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2065 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 2066 ret <3 x float> %shuf 2067} 2068 2069; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32( 2070; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2071; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 2072; CHECK-NEXT: ret <3 x float> %shuf 2073define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2074 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2075 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 2076 ret <3 x float> %shuf 2077} 2078 2079; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v3f32( 2080; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2081; CHECK-NEXT: ret float %data 2082define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2083 %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2084 %elt0 = extractelement <3 x float> %data, i32 0 2085 ret float %elt0 2086} 2087 2088; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v3f32( 2089; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2090; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 2091; CHECK-NEXT: ret float %elt1 2092define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2093 %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2094 %elt1 = extractelement <3 x float> %data, i32 1 2095 ret float %elt1 2096} 2097 2098; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v3f32( 2099; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2100; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 2101; CHECK-NEXT: ret float %elt1 2102define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2103 %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2104 %elt1 = extractelement <3 x float> %data, i32 2 2105 ret float %elt1 2106} 2107 2108; CHECK-LABEL: @extract_elt0_elt1_raw_tbuffer_load_v3f32( 2109; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2110; CHECK-NEXT: ret <2 x float> 2111define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2112 %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2113 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> 2114 ret <2 x float> %shuf 2115} 2116 2117; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v3f32( 2118; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2119; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 2120; CHECK-NEXT: ret <2 x float> %shuf 2121define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2122 %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2123 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 2124 ret <2 x float> %shuf 2125} 2126 2127; CHECK-LABEL: @extract0_bitcast_raw_tbuffer_load_v4f32( 2128; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2129; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 2130; CHECK-NEXT: ret i32 %tmp2 2131define i32 @extract0_bitcast_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2132 %tmp = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2133 %tmp1 = bitcast <4 x float> %tmp to <4 x i32> 2134 %tmp2 = extractelement <4 x i32> %tmp1, i32 0 2135 ret i32 %tmp2 2136} 2137 2138; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_tbuffer_load_v2f32( 2139; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0), !fpmath !0 2140; CHECK-NEXT: ret float %data 2141define amdgpu_ps float @preserve_metadata_extract_elt0_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2142 %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0), !fpmath !0 2143 %elt0 = extractelement <2 x float> %data, i32 0 2144 ret float %elt0 2145} 2146 2147declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32) #1 2148declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #1 2149declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #1 2150declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1 2151 2152declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1 2153 2154; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f16( 2155; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2156; CHECK-NEXT: %elt1 = extractelement <4 x half> %data, i32 3 2157; CHECK-NEXT: ret half %elt1 2158define amdgpu_ps half @extract_elt3_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2159 %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2160 %elt1 = extractelement <4 x half> %data, i32 3 2161 ret half %elt1 2162} 2163 2164; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f16( 2165; CHECK-NEXT: %data = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2166; CHECK-NEXT: %elt1 = extractelement <3 x half> %data, i32 2 2167; CHECK-NEXT: ret half %elt1 2168define amdgpu_ps half @extract_elt2_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2169 %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2170 %elt1 = extractelement <4 x half> %data, i32 2 2171 ret half %elt1 2172} 2173 2174; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f16( 2175; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2176; CHECK-NEXT: %elt1 = extractelement <2 x half> %data, i32 1 2177; CHECK-NEXT: ret half %elt1 2178define amdgpu_ps half @extract_elt1_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2179 %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2180 %elt1 = extractelement <4 x half> %data, i32 1 2181 ret half %elt1 2182} 2183 2184; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f16( 2185; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2186; CHECK-NEXT: ret half %data 2187define amdgpu_ps half @extract_elt0_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { 2188 %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) 2189 %elt1 = extractelement <4 x half> %data, i32 0 2190 ret half %elt1 2191} 2192 2193declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1 2194declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1 2195declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1 2196declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1 2197 2198; -------------------------------------------------------------------- 2199; llvm.amdgcn.struct.tbuffer.load 2200; -------------------------------------------------------------------- 2201 2202; CHECK-LABEL: @struct_tbuffer_load_f32( 2203; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2204; CHECK-NEXT: ret float %data 2205define amdgpu_ps float @struct_tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2206 %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2207 ret float %data 2208} 2209 2210; CHECK-LABEL: @struct_tbuffer_load_v2f32( 2211; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2212; CHECK-NEXT: ret <2 x float> %data 2213define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2214 %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2215 ret <2 x float> %data 2216} 2217 2218; CHECK-LABEL: @struct_tbuffer_load_v4f32( 2219; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2220; CHECK-NEXT: ret <4 x float> %data 2221define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2222 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2223 ret <4 x float> %data 2224} 2225 2226; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v2f32( 2227; CHECK: %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2228; CHECK-NEXT: ret float %data 2229define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2230 %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2231 %elt0 = extractelement <2 x float> %data, i32 0 2232 ret float %elt0 2233} 2234 2235; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v2f32( 2236; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2237; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 2238; CHECK-NEXT: ret float %elt1 2239define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2240 %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2241 %elt1 = extractelement <2 x float> %data, i32 1 2242 ret float %elt1 2243} 2244 2245; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v4f32( 2246; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2247; CHECK-NEXT: ret float %data 2248define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2249 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2250 %elt0 = extractelement <4 x float> %data, i32 0 2251 ret float %elt0 2252} 2253 2254; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v4f32( 2255; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2256; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 2257; CHECK-NEXT: ret float %elt1 2258define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2259 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2260 %elt1 = extractelement <4 x float> %data, i32 1 2261 ret float %elt1 2262} 2263 2264; CHECK-LABEL: @extract_elt2_struct_tbuffer_load_v4f32( 2265; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2266; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 2267; CHECK-NEXT: ret float %elt1 2268define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2269 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2270 %elt1 = extractelement <4 x float> %data, i32 2 2271 ret float %elt1 2272} 2273 2274; CHECK-LABEL: @extract_elt3_struct_tbuffer_load_v4f32( 2275; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2276; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 2277; CHECK-NEXT: ret float %elt1 2278define amdgpu_ps float @extract_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2279 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2280 %elt1 = extractelement <4 x float> %data, i32 3 2281 ret float %elt1 2282} 2283 2284; CHECK-LABEL: @extract_elt0_elt1_struct_tbuffer_load_v4f32( 2285; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2286; CHECK-NEXT: ret <2 x float> 2287define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2288 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2289 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 2290 ret <2 x float> %shuf 2291} 2292 2293; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v4f32( 2294; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2295; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 2296; CHECK-NEXT: ret <2 x float> %shuf 2297define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2298 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2299 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> 2300 ret <2 x float> %shuf 2301} 2302 2303; CHECK-LABEL: @extract_elt2_elt3_struct_tbuffer_load_v4f32( 2304; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2305; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 2306; CHECK-NEXT: ret <2 x float> %shuf 2307define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2308 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2309 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 2310 ret <2 x float> %shuf 2311} 2312 2313; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32( 2314; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2315; CHECK-NEXT: ret <3 x float> %data 2316define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2317 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2318 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 2319 ret <3 x float> %shuf 2320} 2321 2322; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32( 2323; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2324; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 2325; CHECK-NEXT: ret <3 x float> %shuf 2326define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2327 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2328 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 2329 ret <3 x float> %shuf 2330} 2331 2332; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32( 2333; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2334; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 2335; CHECK-NEXT: ret <3 x float> %shuf 2336define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2337 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2338 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 2339 ret <3 x float> %shuf 2340} 2341 2342; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v3f32( 2343; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2344; CHECK-NEXT: ret float %data 2345define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2346 %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2347 %elt0 = extractelement <3 x float> %data, i32 0 2348 ret float %elt0 2349} 2350 2351; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v3f32( 2352; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2353; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 2354; CHECK-NEXT: ret float %elt1 2355define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2356 %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2357 %elt1 = extractelement <3 x float> %data, i32 1 2358 ret float %elt1 2359} 2360 2361; CHECK-LABEL: @extract_elt2_struct_tbuffer_load_v3f32( 2362; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2363; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 2364; CHECK-NEXT: ret float %elt1 2365define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2366 %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2367 %elt1 = extractelement <3 x float> %data, i32 2 2368 ret float %elt1 2369} 2370 2371; CHECK-LABEL: @extract_elt0_elt1_struct_tbuffer_load_v3f32( 2372; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2373; CHECK-NEXT: ret <2 x float> 2374define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2375 %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2376 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> 2377 ret <2 x float> %shuf 2378} 2379 2380; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v3f32( 2381; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2382; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 2383; CHECK-NEXT: ret <2 x float> %shuf 2384define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2385 %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2386 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 2387 ret <2 x float> %shuf 2388} 2389 2390; CHECK-LABEL: @extract0_bitcast_struct_tbuffer_load_v4f32( 2391; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2392; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 2393; CHECK-NEXT: ret i32 %tmp2 2394define i32 @extract0_bitcast_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2395 %tmp = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) 2396 %tmp1 = bitcast <4 x float> %tmp to <4 x i32> 2397 %tmp2 = extractelement <4 x i32> %tmp1, i32 0 2398 ret i32 %tmp2 2399} 2400 2401; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_tbuffer_load_v2f32( 2402; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0), !fpmath !0 2403; CHECK-NEXT: ret float %data 2404define amdgpu_ps float @preserve_metadata_extract_elt0_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { 2405 %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0), !fpmath !0 2406 %elt0 = extractelement <2 x float> %data, i32 0 2407 ret float %elt0 2408} 2409 2410declare float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32, i32) #1 2411declare <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32, i32) #1 2412declare <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32, i32) #1 2413declare <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32) #1 2414 2415declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32) #1 2416 2417; -------------------------------------------------------------------- 2418; llvm.amdgcn.tbuffer.load 2419; -------------------------------------------------------------------- 2420 2421; CHECK-LABEL: @tbuffer_load_f32( 2422; CHECK-NEXT: %data = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2423; CHECK-NEXT: ret float %data 2424define amdgpu_ps float @tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2425 %data = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2426 ret float %data 2427} 2428 2429; CHECK-LABEL: @tbuffer_load_v2f32( 2430; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2431; CHECK-NEXT: ret <2 x float> %data 2432define amdgpu_ps <2 x float> @tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2433 %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2434 ret <2 x float> %data 2435} 2436 2437; CHECK-LABEL: @tbuffer_load_v4f32( 2438; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2439; CHECK-NEXT: ret <4 x float> %data 2440define amdgpu_ps <4 x float> @tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2441 %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2442 ret <4 x float> %data 2443} 2444 2445; CHECK-LABEL: @extract_elt0_tbuffer_load_v2f32( 2446; CHECK: %data = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2447; CHECK-NEXT: ret float %data 2448define amdgpu_ps float @extract_elt0_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2449 %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2450 %elt0 = extractelement <2 x float> %data, i32 0 2451 ret float %elt0 2452} 2453 2454; CHECK-LABEL: @extract_elt1_tbuffer_load_v2f32( 2455; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2456; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 2457; CHECK-NEXT: ret float %elt1 2458define amdgpu_ps float @extract_elt1_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2459 %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2460 %elt1 = extractelement <2 x float> %data, i32 1 2461 ret float %elt1 2462} 2463 2464; CHECK-LABEL: @extract_elt0_tbuffer_load_v4f32( 2465; CHECK-NEXT: %data = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2466; CHECK-NEXT: ret float %data 2467define amdgpu_ps float @extract_elt0_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2468 %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2469 %elt0 = extractelement <4 x float> %data, i32 0 2470 ret float %elt0 2471} 2472 2473; CHECK-LABEL: @extract_elt1_tbuffer_load_v4f32( 2474; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2475; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 2476; CHECK-NEXT: ret float %elt1 2477define amdgpu_ps float @extract_elt1_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2478 %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2479 %elt1 = extractelement <4 x float> %data, i32 1 2480 ret float %elt1 2481} 2482 2483; CHECK-LABEL: @extract_elt2_tbuffer_load_v4f32( 2484; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2485; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 2486; CHECK-NEXT: ret float %elt1 2487define amdgpu_ps float @extract_elt2_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2488 %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2489 %elt1 = extractelement <4 x float> %data, i32 2 2490 ret float %elt1 2491} 2492 2493; CHECK-LABEL: @extract_elt3_tbuffer_load_v4f32( 2494; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2495; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 2496; CHECK-NEXT: ret float %elt1 2497define amdgpu_ps float @extract_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2498 %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2499 %elt1 = extractelement <4 x float> %data, i32 3 2500 ret float %elt1 2501} 2502 2503; CHECK-LABEL: @extract_elt0_elt1_tbuffer_load_v4f32( 2504; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2505; CHECK-NEXT: ret <2 x float> 2506define amdgpu_ps <2 x float> @extract_elt0_elt1_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2507 %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2508 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 2509 ret <2 x float> %shuf 2510} 2511 2512; CHECK-LABEL: @extract_elt1_elt2_tbuffer_load_v4f32( 2513; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2514; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 2515; CHECK-NEXT: ret <2 x float> %shuf 2516define amdgpu_ps <2 x float> @extract_elt1_elt2_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2517 %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2518 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> 2519 ret <2 x float> %shuf 2520} 2521 2522; CHECK-LABEL: @extract_elt2_elt3_tbuffer_load_v4f32( 2523; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2524; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 2525; CHECK-NEXT: ret <2 x float> %shuf 2526define amdgpu_ps <2 x float> @extract_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2527 %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2528 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3> 2529 ret <2 x float> %shuf 2530} 2531 2532; CHECK-LABEL: @extract_elt0_elt1_elt2_tbuffer_load_v4f32( 2533; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2534; CHECK-NEXT: ret <3 x float> %data 2535define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2536 %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2537 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 2538 ret <3 x float> %shuf 2539} 2540 2541; CHECK-LABEL: @extract_elt1_elt2_elt3_tbuffer_load_v4f32( 2542; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2543; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 2544; CHECK-NEXT: ret <3 x float> %shuf 2545define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2546 %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2547 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 2548 ret <3 x float> %shuf 2549} 2550 2551; CHECK-LABEL: @extract_elt0_elt2_elt3_tbuffer_load_v4f32( 2552; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2553; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 2554; CHECK-NEXT: ret <3 x float> %shuf 2555define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2556 %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2557 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3> 2558 ret <3 x float> %shuf 2559} 2560 2561; CHECK-LABEL: @extract_elt0_tbuffer_load_v3f32( 2562; CHECK-NEXT: %data = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2563; CHECK-NEXT: ret float %data 2564define amdgpu_ps float @extract_elt0_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2565 %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2566 %elt0 = extractelement <3 x float> %data, i32 0 2567 ret float %elt0 2568} 2569 2570; CHECK-LABEL: @extract_elt1_tbuffer_load_v3f32( 2571; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2572; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 2573; CHECK-NEXT: ret float %elt1 2574define amdgpu_ps float @extract_elt1_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2575 %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2576 %elt1 = extractelement <3 x float> %data, i32 1 2577 ret float %elt1 2578} 2579 2580; CHECK-LABEL: @extract_elt2_tbuffer_load_v3f32( 2581; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2582; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 2583; CHECK-NEXT: ret float %elt1 2584define amdgpu_ps float @extract_elt2_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2585 %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2586 %elt1 = extractelement <3 x float> %data, i32 2 2587 ret float %elt1 2588} 2589 2590; CHECK-LABEL: @extract_elt0_elt1_tbuffer_load_v3f32( 2591; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2592; CHECK-NEXT: ret <2 x float> 2593define amdgpu_ps <2 x float> @extract_elt0_elt1_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2594 %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2595 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1> 2596 ret <2 x float> %shuf 2597} 2598 2599; CHECK-LABEL: @extract_elt1_elt2_tbuffer_load_v3f32( 2600; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2601; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 2602; CHECK-NEXT: ret <2 x float> %shuf 2603define amdgpu_ps <2 x float> @extract_elt1_elt2_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2604 %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2605 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2> 2606 ret <2 x float> %shuf 2607} 2608 2609; CHECK-LABEL: @extract0_bitcast_tbuffer_load_v4f32( 2610; CHECK-NEXT: %tmp = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2611; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 2612; CHECK-NEXT: ret i32 %tmp2 2613define i32 @extract0_bitcast_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2614 %tmp = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) 2615 %tmp1 = bitcast <4 x float> %tmp to <4 x i32> 2616 %tmp2 = extractelement <4 x i32> %tmp1, i32 0 2617 ret i32 %tmp2 2618} 2619 2620; CHECK-LABEL: @preserve_metadata_extract_elt0_tbuffer_load_v2f32( 2621; CHECK-NEXT: %data = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false), !fpmath !0 2622; CHECK-NEXT: ret float %data 2623define amdgpu_ps float @preserve_metadata_extract_elt0_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { 2624 %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false), !fpmath !0 2625 %elt0 = extractelement <2 x float> %data, i32 0 2626 ret float %elt0 2627} 2628 2629declare float @llvm.amdgcn.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #1 2630declare <1 x float> @llvm.amdgcn.tbuffer.load.v1f32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #1 2631declare <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #1 2632declare <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #1 2633declare <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #1 2634 2635declare <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #1 2636 2637; -------------------------------------------------------------------- 2638; llvm.amdgcn.image.sample 2639; -------------------------------------------------------------------- 2640 2641; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32( 2642; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2643; CHECK-NEXT: ret float %data 2644define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2645 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2646 %elt0 = extractelement <4 x float> %data, i32 0 2647 ret float %elt0 2648} 2649 2650; Check that the intrinsic remains unchanged in the presence of TFE or LWE 2651; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_tfe( 2652; CHECK-NEXT: %data = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0) 2653; CHECK: ret float %elt0 2654define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_tfe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2655 %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0) 2656 %data.vec = extractvalue {<4 x float>,i32} %data, 0 2657 %elt0 = extractelement <4 x float> %data.vec, i32 0 2658 ret float %elt0 2659} 2660 2661; Check that the intrinsic remains unchanged in the presence of TFE or LWE 2662; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_lwe( 2663; CHECK-NEXT: %data = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0) 2664; CHECK: ret float %elt0 2665define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_lwe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2666 %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0) 2667 %data.vec = extractvalue {<4 x float>,i32} %data, 0 2668 %elt0 = extractelement <4 x float> %data.vec, i32 0 2669 ret float %elt0 2670} 2671 2672; CHECK-LABEL: @extract_elt0_image_sample_2d_v4f32_f32( 2673; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2674; CHECK-NEXT: ret float %data 2675define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2676 %data = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2677 %elt0 = extractelement <4 x float> %data, i32 0 2678 ret float %elt0 2679} 2680 2681; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32( 2682; CHECK-NEXT: ret float undef 2683define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2684 %data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2685 %elt0 = extractelement <4 x float> %data, i32 0 2686 ret float %elt0 2687} 2688 2689; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32( 2690; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1darray.f32.f32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2691; CHECK-NEXT: ret float %data 2692define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(float %s, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2693 %data = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2694 %elt0 = extractelement <4 x float> %data, i32 0 2695 ret float %elt0 2696} 2697 2698; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32( 2699; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2700; CHECK-NEXT: ret float %data 2701define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2702 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2703 %elt0 = extractelement <4 x float> %data, i32 0 2704 ret float %elt0 2705} 2706 2707; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32( 2708; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2709; CHECK-NEXT: ret float %data 2710define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2711 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2712 %elt0 = extractelement <4 x float> %data, i32 0 2713 ret float %elt0 2714} 2715 2716; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32( 2717; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2718; CHECK-NEXT: ret float %data 2719define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2720 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2721 %elt0 = extractelement <4 x float> %data, i32 0 2722 ret float %elt0 2723} 2724 2725; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32( 2726; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2727; CHECK-NEXT: ret float %data 2728define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2729 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 9, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2730 %elt0 = extractelement <4 x float> %data, i32 0 2731 ret float %elt0 2732} 2733 2734; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32( 2735; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2736; CHECK-NEXT: ret float %data 2737define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2738 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2739 %elt0 = extractelement <4 x float> %data, i32 0 2740 ret float %elt0 2741} 2742 2743; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32( 2744; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2745; CHECK-NEXT: ret float %data 2746define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2747 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2748 %elt0 = extractelement <4 x float> %data, i32 0 2749 ret float %elt0 2750} 2751 2752; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32( 2753; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2754; CHECK-NEXT: %1 = insertelement <2 x float> undef, float %data, i32 0 2755; CHECK-NEXT: ret <2 x float> %1 2756define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2757 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2758 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 2759 ret <2 x float> %shuf 2760} 2761 2762; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32( 2763; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2764; CHECK-NEXT: ret <2 x float> %data 2765define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2766 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2767 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 2768 ret <2 x float> %shuf 2769} 2770 2771; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32( 2772; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2773; CHECK-NEXT: ret <2 x float> %data 2774define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2775 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2776 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 2777 ret <2 x float> %shuf 2778} 2779 2780; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32( 2781; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2782; CHECK-NEXT: ret <2 x float> %data 2783define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2784 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2785 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1> 2786 ret <2 x float> %shuf 2787} 2788 2789; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32( 2790; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2791; CHECK-NEXT: %1 = insertelement <3 x float> undef, float %data, i32 0 2792; CHECK-NEXT: ret <3 x float> %1 2793define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2794 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2795 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 2796 ret <3 x float> %shuf 2797} 2798 2799; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32( 2800; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2801; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef> 2802; CHECK-NEXT: ret <3 x float> %shuf 2803define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2804 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2805 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 2806 ret <3 x float> %shuf 2807} 2808 2809; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32( 2810; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2811; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef> 2812; CHECK-NEXT: ret <3 x float> %shuf 2813define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2814 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2815 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 2816 ret <3 x float> %shuf 2817} 2818 2819; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32( 2820; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2821; CHECK-NEXT: ret <3 x float> %data 2822define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2823 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2824 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 2825 ret <3 x float> %shuf 2826} 2827 2828; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32( 2829; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2830; CHECK-NEXT: ret <3 x float> %data 2831define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2832 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2833 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 2834 ret <3 x float> %shuf 2835} 2836 2837declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2838declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2839declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2840declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2841declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2842 2843; -------------------------------------------------------------------- 2844; llvm.amdgcn.image.sample.cl 2845; -------------------------------------------------------------------- 2846 2847; CHECK-LABEL: @extract_elt1_image_sample_cl_2darray_v4f32_f32( 2848; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.2darray.f32.f32(i32 2, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2849; CHECK-NEXT: ret float %data 2850define amdgpu_ps float @extract_elt1_image_sample_cl_2darray_v4f32_f32(float %s, float %t, float %slice, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2851 %data = call <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2852 %elt0 = extractelement <4 x float> %data, i32 1 2853 ret float %elt0 2854} 2855 2856declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2857 2858; -------------------------------------------------------------------- 2859; llvm.amdgcn.image.sample.d 2860; -------------------------------------------------------------------- 2861 2862; CHECK-LABEL: @extract_elt2_image_sample_d_cube_v4f32_f32_f32( 2863; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32(i32 4, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2864; CHECK-NEXT: ret float %data 2865define amdgpu_ps float @extract_elt2_image_sample_d_cube_v4f32_f32_f32(float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2866 %data = call <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2867 %elt0 = extractelement <4 x float> %data, i32 2 2868 ret float %elt0 2869} 2870 2871declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2872 2873; -------------------------------------------------------------------- 2874; llvm.amdgcn.image.sample.d.cl 2875; -------------------------------------------------------------------- 2876 2877; CHECK-LABEL: @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32( 2878; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.1d.f32.f32.f32(i32 8, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2879; CHECK-NEXT: ret float %data 2880define amdgpu_ps float @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2881 %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2882 %elt0 = extractelement <4 x float> %data, i32 3 2883 ret float %elt0 2884} 2885 2886declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2887 2888; -------------------------------------------------------------------- 2889; llvm.amdgcn.image.sample.l 2890; -------------------------------------------------------------------- 2891 2892; CHECK-LABEL: @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32( 2893; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.1d.f32.f32(i32 4, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2894; CHECK-NEXT: ret float %data 2895define amdgpu_ps float @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2896 %data = call <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32 6, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2897 %elt0 = extractelement <2 x float> %data, i32 1 2898 ret float %elt0 2899} 2900 2901declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2902 2903; -------------------------------------------------------------------- 2904; llvm.amdgcn.image.sample.b 2905; -------------------------------------------------------------------- 2906 2907; CHECK-LABEL: @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32( 2908; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.1d.f32.f32.f32(i32 8, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2909; CHECK-NEXT: ret float %data 2910define amdgpu_ps float @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2911 %data = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 9, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2912 %elt0 = extractelement <4 x float> %data, i32 1 2913 ret float %elt0 2914} 2915 2916declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2917 2918; -------------------------------------------------------------------- 2919; llvm.amdgcn.image.sample.b.cl 2920; -------------------------------------------------------------------- 2921 2922; CHECK-LABEL: @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32( 2923; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.b.cl.1d.v2f32.f32.f32(i32 12, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2924; CHECK-NEXT: ret <2 x float> %data 2925define amdgpu_ps <2 x float> @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2926 %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2927 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2> 2928 ret <2 x float> %shuf 2929} 2930 2931declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2932 2933; -------------------------------------------------------------------- 2934; llvm.amdgcn.image.sample.lz 2935; -------------------------------------------------------------------- 2936 2937; CHECK-LABEL: @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32( 2938; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.lz.1d.v2f32.f32(i32 10, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2939; CHECK-NEXT: ret <2 x float> %data 2940define amdgpu_ps <2 x float> @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2941 %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2942 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 3> 2943 ret <2 x float> %shuf 2944} 2945 2946declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2947 2948; -------------------------------------------------------------------- 2949; llvm.amdgcn.image.sample.cd 2950; -------------------------------------------------------------------- 2951 2952; CHECK-LABEL: @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32( 2953; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.image.sample.cd.1d.v3f32.f32.f32(i32 14, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2954; CHECK-NEXT: ret <3 x float> %data 2955define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2956 %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2957 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 2958 ret <3 x float> %shuf 2959} 2960 2961declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2962 2963; -------------------------------------------------------------------- 2964; llvm.amdgcn.image.sample.cd.cl 2965; -------------------------------------------------------------------- 2966 2967; CHECK-LABEL: @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32( 2968; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 8, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2969; CHECK-NEXT: ret half %data 2970define amdgpu_ps half @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2971 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2972 %elt0 = extractelement <4 x half> %data, i32 3 2973 ret half %elt0 2974} 2975 2976; CHECK-LABEL: @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32( 2977; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 4, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2978; CHECK-NEXT: ret half %data 2979define amdgpu_ps half @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2980 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2981 %elt0 = extractelement <4 x half> %data, i32 2 2982 ret half %elt0 2983} 2984 2985; CHECK-LABEL: @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32( 2986; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 2, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2987; CHECK-NEXT: ret half %data 2988define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2989 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2990 %elt0 = extractelement <4 x half> %data, i32 1 2991 ret half %elt0 2992} 2993 2994; CHECK-LABEL: @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32( 2995; CHECK-NEXT: %data = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32(i32 7, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 2996; CHECK-NEXT: %res = shufflevector <3 x half> %data, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 2997; CHECK-NEXT: ret <4 x half> %res 2998define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 2999 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3000 %res = shufflevector <4 x half> %data, <4 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 3001 ret <4 x half> %res 3002} 3003 3004; CHECK-LABEL: @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32( 3005; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32(i32 3, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3006; CHECK-NEXT: %res = shufflevector <2 x half> %data, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 3007; CHECK-NEXT: ret <4 x half> %res 3008define amdgpu_ps <4 x half> @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3009 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3010 %res = shufflevector <4 x half> %data, <4 x half> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 3011 ret <4 x half> %res 3012} 3013 3014; CHECK-LABEL: @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32( 3015; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 1, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3016; CHECK-NEXT: %res = insertelement <4 x half> undef, half %data, i64 0 3017; CHECK-NEXT: ret <4 x half> %res 3018define amdgpu_ps <4 x half> @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3019 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3020 %res = shufflevector <4 x half> %data, <4 x half> undef, <4 x i32> <i32 0, i32 4, i32 5, i32 6> 3021 ret <4 x half> %res 3022} 3023 3024; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32( 3025; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 1, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3026; CHECK-NEXT: ret half %data 3027define amdgpu_ps half @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3028 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3029 %elt0 = extractelement <4 x half> %data, i32 0 3030 ret half %elt0 3031} 3032 3033declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3034 3035; -------------------------------------------------------------------- 3036; llvm.amdgcn.image.sample.c 3037; -------------------------------------------------------------------- 3038 3039; CHECK-LABEL: @extract_elt0_image_sample_c_1d_v4f32_f32( 3040; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.1d.f32.f32(i32 1, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3041; CHECK-NEXT: ret float %data 3042define amdgpu_ps float @extract_elt0_image_sample_c_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3043 %data = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3044 %elt0 = extractelement <4 x float> %data, i32 0 3045 ret float %elt0 3046} 3047 3048declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3049 3050; -------------------------------------------------------------------- 3051; llvm.amdgcn.image.sample.c.cl 3052; -------------------------------------------------------------------- 3053 3054; CHECK-LABEL: @extract_elt0_image_sample_c_cl_1d_v4f32_f32( 3055; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.1d.f32.f32(i32 1, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3056; CHECK-NEXT: ret float %data 3057define amdgpu_ps float @extract_elt0_image_sample_c_cl_1d_v4f32_f32(float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3058 %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3059 %elt0 = extractelement <4 x float> %data, i32 0 3060 ret float %elt0 3061} 3062 3063declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3064 3065; -------------------------------------------------------------------- 3066; llvm.amdgcn.image.sample.c.d 3067; -------------------------------------------------------------------- 3068 3069; CHECK-LABEL: @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32( 3070; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3071; CHECK-NEXT: ret float %data 3072define amdgpu_ps float @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3073 %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3074 %elt0 = extractelement <4 x float> %data, i32 0 3075 ret float %elt0 3076} 3077 3078declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3079 3080; -------------------------------------------------------------------- 3081; llvm.amdgcn.image.sample.c.d.cl 3082; -------------------------------------------------------------------- 3083 3084; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32( 3085; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3086; CHECK-NEXT: ret float %data 3087define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3088 %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3089 %elt0 = extractelement <4 x float> %data, i32 0 3090 ret float %elt0 3091} 3092 3093declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3094 3095; -------------------------------------------------------------------- 3096; llvm.amdgcn.image.sample.c.l 3097; -------------------------------------------------------------------- 3098 3099; CHECK-LABEL: @extract_elt0_image_sample_c_l_1d_v4f32_f32( 3100; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.1d.f32.f32(i32 1, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3101; CHECK-NEXT: ret float %data 3102define amdgpu_ps float @extract_elt0_image_sample_c_l_1d_v4f32_f32(float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3103 %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3104 %elt0 = extractelement <4 x float> %data, i32 0 3105 ret float %elt0 3106} 3107 3108declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3109 3110; -------------------------------------------------------------------- 3111; llvm.amdgcn.image.sample.c.b 3112; -------------------------------------------------------------------- 3113 3114; CHECK-LABEL: @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32( 3115; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.1d.f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3116; CHECK-NEXT: ret float %data 3117define amdgpu_ps float @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3118 %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3119 %elt0 = extractelement <4 x float> %data, i32 0 3120 ret float %elt0 3121} 3122 3123declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3124 3125; -------------------------------------------------------------------- 3126; llvm.amdgcn.image.sample.c.b.cl 3127; -------------------------------------------------------------------- 3128 3129; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32( 3130; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.1d.f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3131; CHECK-NEXT: ret float %data 3132define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3133 %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3134 %elt0 = extractelement <4 x float> %data, i32 0 3135 ret float %elt0 3136} 3137 3138declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3139 3140; -------------------------------------------------------------------- 3141; llvm.amdgcn.image.sample.c.lz 3142; -------------------------------------------------------------------- 3143 3144; CHECK-LABEL: @extract_elt0_image_sample_c_lz_1d_v4f32_f32( 3145; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.1d.f32.f32(i32 1, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3146; CHECK-NEXT: ret float %data 3147define amdgpu_ps float @extract_elt0_image_sample_c_lz_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3148 %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3149 %elt0 = extractelement <4 x float> %data, i32 0 3150 ret float %elt0 3151} 3152 3153declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3154 3155; -------------------------------------------------------------------- 3156; llvm.amdgcn.image.sample.c.cd 3157; -------------------------------------------------------------------- 3158 3159; CHECK-LABEL: @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32( 3160; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3161; CHECK-NEXT: ret float %data 3162define amdgpu_ps float @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3163 %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3164 %elt0 = extractelement <4 x float> %data, i32 0 3165 ret float %elt0 3166} 3167 3168declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3169 3170; -------------------------------------------------------------------- 3171; llvm.amdgcn.image.sample.c.cd.cl 3172; -------------------------------------------------------------------- 3173 3174; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32( 3175; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3176; CHECK-NEXT: ret float %data 3177define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3178 %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3179 %elt0 = extractelement <4 x float> %data, i32 0 3180 ret float %elt0 3181} 3182 3183declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3184 3185; -------------------------------------------------------------------- 3186; llvm.amdgcn.image.sample.o 3187; -------------------------------------------------------------------- 3188 3189; CHECK-LABEL: @extract_elt0_image_sample_o_1d_v4f32_f32( 3190; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.1d.f32.f32(i32 1, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3191; CHECK-NEXT: ret float %data 3192define amdgpu_ps float @extract_elt0_image_sample_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3193 %data = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3194 %elt0 = extractelement <4 x float> %data, i32 0 3195 ret float %elt0 3196} 3197 3198declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3199 3200; -------------------------------------------------------------------- 3201; llvm.amdgcn.image.sample.cl.o 3202; -------------------------------------------------------------------- 3203 3204; CHECK-LABEL: @extract_elt0_image_sample_cl_o_1d_v4f32_f32( 3205; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.o.1d.f32.f32(i32 1, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3206; CHECK-NEXT: ret float %data 3207define amdgpu_ps float @extract_elt0_image_sample_cl_o_1d_v4f32_f32(i32 %offset, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3208 %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3209 %elt0 = extractelement <4 x float> %data, i32 0 3210 ret float %elt0 3211} 3212 3213declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3214 3215; -------------------------------------------------------------------- 3216; llvm.amdgcn.image.sample.d.o 3217; -------------------------------------------------------------------- 3218 3219; CHECK-LABEL: @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32( 3220; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3221; CHECK-NEXT: ret float %data 3222define amdgpu_ps float @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3223 %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3224 %elt0 = extractelement <4 x float> %data, i32 0 3225 ret float %elt0 3226} 3227 3228declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3229 3230; -------------------------------------------------------------------- 3231; llvm.amdgcn.image.sample.d.cl.o 3232; -------------------------------------------------------------------- 3233 3234; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32( 3235; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3236; CHECK-NEXT: ret float %data 3237define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3238 %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3239 %elt0 = extractelement <4 x float> %data, i32 0 3240 ret float %elt0 3241} 3242 3243declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3244 3245; -------------------------------------------------------------------- 3246; llvm.amdgcn.image.sample.l.o 3247; -------------------------------------------------------------------- 3248 3249; CHECK-LABEL: @extract_elt0_image_sample_l_o_1d_v4f32_f32( 3250; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.o.1d.f32.f32(i32 1, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3251; CHECK-NEXT: ret float %data 3252define amdgpu_ps float @extract_elt0_image_sample_l_o_1d_v4f32_f32(i32 %offset, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3253 %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3254 %elt0 = extractelement <4 x float> %data, i32 0 3255 ret float %elt0 3256} 3257 3258declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3259 3260; -------------------------------------------------------------------- 3261; llvm.amdgcn.image.sample.b.o 3262; -------------------------------------------------------------------- 3263 3264; CHECK-LABEL: @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32( 3265; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3266; CHECK-NEXT: ret float %data 3267define amdgpu_ps float @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3268 %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3269 %elt0 = extractelement <4 x float> %data, i32 0 3270 ret float %elt0 3271} 3272 3273declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3274 3275; -------------------------------------------------------------------- 3276; llvm.amdgcn.image.sample.b.cl.o 3277; -------------------------------------------------------------------- 3278 3279; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32( 3280; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3281; CHECK-NEXT: ret float %data 3282define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3283 %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3284 %elt0 = extractelement <4 x float> %data, i32 0 3285 ret float %elt0 3286} 3287 3288declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3289 3290; -------------------------------------------------------------------- 3291; llvm.amdgcn.image.sample.lz.o 3292; -------------------------------------------------------------------- 3293 3294; CHECK-LABEL: @extract_elt0_image_sample_lz_o_1d_v4f32_f32( 3295; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.lz.o.1d.f32.f32(i32 1, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3296; CHECK-NEXT: ret float %data 3297define amdgpu_ps float @extract_elt0_image_sample_lz_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3298 %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3299 %elt0 = extractelement <4 x float> %data, i32 0 3300 ret float %elt0 3301} 3302 3303declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3304 3305; -------------------------------------------------------------------- 3306; llvm.amdgcn.image.sample.cd.o 3307; -------------------------------------------------------------------- 3308 3309; CHECK-LABEL: @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32( 3310; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3311; CHECK-NEXT: ret float %data 3312define amdgpu_ps float @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3313 %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3314 %elt0 = extractelement <4 x float> %data, i32 0 3315 ret float %elt0 3316} 3317 3318declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3319 3320; -------------------------------------------------------------------- 3321; llvm.amdgcn.image.sample.cd.cl.o 3322; -------------------------------------------------------------------- 3323 3324; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32( 3325; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3326; CHECK-NEXT: ret float %data 3327define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3328 %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3329 %elt0 = extractelement <4 x float> %data, i32 0 3330 ret float %elt0 3331} 3332 3333declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3334 3335; -------------------------------------------------------------------- 3336; llvm.amdgcn.image.sample.c.o 3337; -------------------------------------------------------------------- 3338 3339; CHECK-LABEL: @extract_elt0_image_sample_c_o_1d_v4f32_f32( 3340; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3341; CHECK-NEXT: ret float %data 3342define amdgpu_ps float @extract_elt0_image_sample_c_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3343 %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3344 %elt0 = extractelement <4 x float> %data, i32 0 3345 ret float %elt0 3346} 3347 3348declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3349 3350; -------------------------------------------------------------------- 3351; llvm.amdgcn.image.sample.c.cl.o 3352; -------------------------------------------------------------------- 3353 3354; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32( 3355; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3356; CHECK-NEXT: ret float %data 3357define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3358 %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3359 %elt0 = extractelement <4 x float> %data, i32 0 3360 ret float %elt0 3361} 3362 3363declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3364 3365; -------------------------------------------------------------------- 3366; llvm.amdgcn.image.sample.c.d.o 3367; -------------------------------------------------------------------- 3368 3369; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32( 3370; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3371; CHECK-NEXT: ret float %data 3372define amdgpu_ps float @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3373 %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3374 %elt0 = extractelement <4 x float> %data, i32 0 3375 ret float %elt0 3376} 3377 3378declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3379 3380; -------------------------------------------------------------------- 3381; llvm.amdgcn.image.sample.c.d.cl.o 3382; -------------------------------------------------------------------- 3383 3384; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32( 3385; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3386; CHECK-NEXT: ret float %data 3387define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3388 %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3389 %elt0 = extractelement <4 x float> %data, i32 0 3390 ret float %elt0 3391} 3392 3393declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3394 3395; -------------------------------------------------------------------- 3396; llvm.amdgcn.image.sample.c.l.o 3397; -------------------------------------------------------------------- 3398 3399; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_1d_v4f32_f32( 3400; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3401; CHECK-NEXT: ret float %data 3402define amdgpu_ps float @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3403 %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3404 %elt0 = extractelement <4 x float> %data, i32 0 3405 ret float %elt0 3406} 3407 3408declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3409 3410; -------------------------------------------------------------------- 3411; llvm.amdgcn.image.sample.c.b.o 3412; -------------------------------------------------------------------- 3413 3414; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32( 3415; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3416; CHECK-NEXT: ret float %data 3417define amdgpu_ps float @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3418 %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3419 %elt0 = extractelement <4 x float> %data, i32 0 3420 ret float %elt0 3421} 3422 3423declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3424 3425; -------------------------------------------------------------------- 3426; llvm.amdgcn.image.sample.c.b.cl.o 3427; -------------------------------------------------------------------- 3428 3429; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32( 3430; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3431; CHECK-NEXT: ret float %data 3432define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3433 %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3434 %elt0 = extractelement <4 x float> %data, i32 0 3435 ret float %elt0 3436} 3437 3438declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3439 3440; -------------------------------------------------------------------- 3441; llvm.amdgcn.image.sample.c.lz.o 3442; -------------------------------------------------------------------- 3443 3444; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32( 3445; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3446; CHECK-NEXT: ret float %data 3447define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3448 %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3449 %elt0 = extractelement <4 x float> %data, i32 0 3450 ret float %elt0 3451} 3452 3453declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3454 3455; -------------------------------------------------------------------- 3456; llvm.amdgcn.image.sample.c.cd.o 3457; -------------------------------------------------------------------- 3458 3459; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32( 3460; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3461; CHECK-NEXT: ret float %data 3462define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3463 %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3464 %elt0 = extractelement <4 x float> %data, i32 0 3465 ret float %elt0 3466} 3467 3468declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3469 3470; -------------------------------------------------------------------- 3471; llvm.amdgcn.image.sample.c.cd.cl.o 3472; -------------------------------------------------------------------- 3473 3474; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32( 3475; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3476; CHECK-NEXT: ret float %data 3477define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3478 %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3479 %elt0 = extractelement <4 x float> %data, i32 0 3480 ret float %elt0 3481} 3482 3483declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3484 3485; -------------------------------------------------------------------- 3486; llvm.amdgcn.image.gather4 3487; -------------------------------------------------------------------- 3488 3489; Don't handle gather4* 3490 3491; CHECK-LABEL: @extract_elt0_image_gather4_2d_v4f32_f32( 3492; CHECK: %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3493define amdgpu_ps float @extract_elt0_image_gather4_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3494 %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3495 %elt0 = extractelement <4 x float> %data, i32 0 3496 ret float %elt0 3497} 3498 3499declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3500 3501; -------------------------------------------------------------------- 3502; llvm.amdgcn.image.gather4.cl 3503; -------------------------------------------------------------------- 3504 3505; CHECK-LABEL: @extract_elt0_image_gather4_cl_2d_v4f32_f32( 3506; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3507define amdgpu_ps float @extract_elt0_image_gather4_cl_2d_v4f32_f32(float %s, float %t, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3508 %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3509 %elt0 = extractelement <4 x float> %data, i32 0 3510 ret float %elt0 3511} 3512 3513declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3514 3515; -------------------------------------------------------------------- 3516; llvm.amdgcn.image.gather4.l 3517; -------------------------------------------------------------------- 3518 3519; CHECK-LABEL: @extract_elt0_image_gather4_l_2d_v4f32_f32( 3520; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3521define amdgpu_ps float @extract_elt0_image_gather4_l_2d_v4f32_f32(float %s, float %t, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3522 %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3523 %elt0 = extractelement <4 x float> %data, i32 0 3524 ret float %elt0 3525} 3526 3527declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3528 3529; -------------------------------------------------------------------- 3530; llvm.amdgcn.image.gather4.b 3531; -------------------------------------------------------------------- 3532 3533; CHECK-LABEL: @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32( 3534; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3535define amdgpu_ps float @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(float %bias, float %s, float %t, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3536 %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3537 %elt0 = extractelement <4 x float> %data, i32 0 3538 ret float %elt0 3539} 3540 3541declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3542 3543; -------------------------------------------------------------------- 3544; llvm.amdgcn.image.gather4.b.cl 3545; -------------------------------------------------------------------- 3546 3547; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32( 3548; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3549define amdgpu_ps float @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3550 %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3551 %elt0 = extractelement <4 x float> %data, i32 0 3552 ret float %elt0 3553} 3554 3555declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3556 3557; -------------------------------------------------------------------- 3558; llvm.amdgcn.image.gather4.lz 3559; -------------------------------------------------------------------- 3560 3561; CHECK-LABEL: @extract_elt0_image_gather4_lz_2d_v4f32_f16( 3562; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3563define amdgpu_ps float @extract_elt0_image_gather4_lz_2d_v4f32_f16(half %s, half %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3564 %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3565 %elt0 = extractelement <4 x float> %data, i32 0 3566 ret float %elt0 3567} 3568 3569declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3570 3571; -------------------------------------------------------------------- 3572; llvm.amdgcn.image.gather4.o 3573; -------------------------------------------------------------------- 3574 3575; CHECK-LABEL: @extract_elt0_image_gather4_o_2d_v4f32_f32( 3576; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3577define amdgpu_ps float @extract_elt0_image_gather4_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { 3578 %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3579 %elt0 = extractelement <4 x float> %data, i32 0 3580 ret float %elt0 3581} 3582 3583declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3584 3585; -------------------------------------------------------------------- 3586; llvm.amdgcn.image.gather4.cl.o 3587; -------------------------------------------------------------------- 3588 3589; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_2d_v4f32_f32( 3590; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3591define amdgpu_ps float @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { 3592 %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3593 %elt0 = extractelement <4 x float> %data, i32 0 3594 ret float %elt0 3595} 3596 3597declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3598 3599; -------------------------------------------------------------------- 3600; llvm.amdgcn.image.gather4.l.o 3601; -------------------------------------------------------------------- 3602 3603; CHECK-LABEL: @extract_elt0_image_gather4_l_o_2d_v4f32_f32( 3604; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3605define amdgpu_ps float @extract_elt0_image_gather4_l_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { 3606 %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3607 %elt0 = extractelement <4 x float> %data, i32 0 3608 ret float %elt0 3609} 3610 3611declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3612 3613; -------------------------------------------------------------------- 3614; llvm.amdgcn.image.gather4.b.o 3615; -------------------------------------------------------------------- 3616 3617; CHECK-LABEL: @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32( 3618; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3619define amdgpu_ps float @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { 3620 %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3621 %elt0 = extractelement <4 x float> %data, i32 0 3622 ret float %elt0 3623} 3624 3625declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3626 3627; -------------------------------------------------------------------- 3628; llvm.amdgcn.image.gather4.b.cl.o 3629; -------------------------------------------------------------------- 3630 3631; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32( 3632; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3633define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { 3634 %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3635 %elt0 = extractelement <4 x float> %data, i32 0 3636 ret float %elt0 3637} 3638 3639declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3640 3641; -------------------------------------------------------------------- 3642; llvm.amdgcn.image.gather4.lz.o 3643; -------------------------------------------------------------------- 3644 3645; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_2d_v4f32_f32( 3646; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3647define amdgpu_ps float @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { 3648 %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3649 %elt0 = extractelement <4 x float> %data, i32 0 3650 ret float %elt0 3651} 3652 3653declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3654 3655; -------------------------------------------------------------------- 3656; llvm.amdgcn.image.gather4.c.o 3657; -------------------------------------------------------------------- 3658 3659; CHECK-LABEL: @extract_elt0_image_gather4_c_o_2d_v4f32_f32( 3660; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3661define amdgpu_ps float @extract_elt0_image_gather4_c_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { 3662 %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3663 %elt0 = extractelement <4 x float> %data, i32 0 3664 ret float %elt0 3665} 3666 3667declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3668 3669; -------------------------------------------------------------------- 3670; llvm.amdgcn.image.gather4.c.cl.o 3671; -------------------------------------------------------------------- 3672 3673; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32( 3674; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3675define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { 3676 %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3677 %elt0 = extractelement <4 x float> %data, i32 0 3678 ret float %elt0 3679} 3680 3681declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3682 3683; -------------------------------------------------------------------- 3684; llvm.amdgcn.image.gather4.c.l.o 3685; -------------------------------------------------------------------- 3686 3687; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32( 3688; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3689define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { 3690 %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3691 %elt0 = extractelement <4 x float> %data, i32 0 3692 ret float %elt0 3693} 3694 3695declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3696 3697; -------------------------------------------------------------------- 3698; llvm.amdgcn.image.gather4.c.b.o 3699; -------------------------------------------------------------------- 3700 3701; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32( 3702; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3703define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { 3704 %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3705 %elt0 = extractelement <4 x float> %data, i32 0 3706 ret float %elt0 3707} 3708 3709declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3710 3711; -------------------------------------------------------------------- 3712; llvm.amdgcn.image.gather4.c.b.cl.o 3713; -------------------------------------------------------------------- 3714 3715; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32( 3716; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3717define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { 3718 %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3719 %elt0 = extractelement <4 x float> %data, i32 0 3720 ret float %elt0 3721} 3722 3723declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3724 3725; -------------------------------------------------------------------- 3726; llvm.amdgcn.image.gather4.c.lz.o 3727; -------------------------------------------------------------------- 3728 3729; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32( 3730; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3731define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { 3732 %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3733 %elt0 = extractelement <4 x float> %data, i32 0 3734 ret float %elt0 3735} 3736 3737declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3738 3739; -------------------------------------------------------------------- 3740; llvm.amdgcn.image.getlod 3741; -------------------------------------------------------------------- 3742 3743; CHECK-LABEL: @extract_elt0_image_getlod_1d_v4f32_f32( 3744; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3745; CHECK-NEXT: ret float %data 3746define amdgpu_ps float @extract_elt0_image_getlod_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { 3747 %data = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) 3748 %elt0 = extractelement <4 x float> %data, i32 0 3749 ret float %elt0 3750} 3751 3752declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3753 3754; -------------------------------------------------------------------- 3755; llvm.amdgcn.image.load 3756; -------------------------------------------------------------------- 3757 3758; CHECK-LABEL: @extract_elt0_image_load_2dmsaa_v4f32_i32( 3759; CHECK-NEXT: %data = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0) 3760; CHECK-NEXT: ret float %data 3761define amdgpu_ps float @extract_elt0_image_load_2dmsaa_v4f32_i32(i32 %s, i32 %t, i32 %sample, <8 x i32> inreg %sampler) #0 { 3762 %data = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0) 3763 %elt0 = extractelement <4 x float> %data, i32 0 3764 ret float %elt0 3765} 3766 3767declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 3768 3769; -------------------------------------------------------------------- 3770; llvm.amdgcn.image.load.mip 3771; -------------------------------------------------------------------- 3772 3773; CHECK-LABEL: @extract_elt0_image_load_mip_1d_v4f32_i32( 3774; CHECK-NEXT: %data = call float @llvm.amdgcn.image.load.mip.1d.f32.i32(i32 1, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) 3775; CHECK-NEXT: ret float %data 3776define amdgpu_ps float @extract_elt0_image_load_mip_1d_v4f32_i32(i32 %s, i32 %mip, <8 x i32> inreg %sampler) #0 { 3777 %data = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) 3778 %elt0 = extractelement <4 x float> %data, i32 0 3779 ret float %elt0 3780} 3781 3782declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 3783 3784; -------------------------------------------------------------------- 3785; llvm.amdgcn.image.getresinfo 3786; -------------------------------------------------------------------- 3787 3788; CHECK-LABEL: @extract_elt0_image_getresinfo_1d_v4f32_i32( 3789; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getresinfo.1d.f32.i32(i32 1, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) 3790; CHECK-NEXT: ret float %data 3791define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8 x i32> inreg %sampler) #0 { 3792 %data = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) 3793 %elt0 = extractelement <4 x float> %data, i32 0 3794 ret float %elt0 3795} 3796 3797declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 3798 3799; -------------------------------------------------------------------- 3800; TFE / LWE 3801; -------------------------------------------------------------------- 3802 3803; CHECK-LABEL: @extract_elt0_tfe_image_load_1d_v4f32i32_i32( 3804; CHECK-NEXT: %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) 3805define amdgpu_ps float @extract_elt0_tfe_image_load_1d_v4f32i32_i32(i32 %s, <8 x i32> inreg %rsrc) #0 { 3806 %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) 3807 %rgba = extractvalue { <4 x float>, i32 } %data, 0 3808 %elt0 = extractelement <4 x float> %rgba, i32 0 3809 ret float %elt0 3810} 3811 3812declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32, i32, <8 x i32>, i32, i32) #1 3813 3814; CHECK: @tfe_check_assert( 3815; CHECK: %data = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) 3816; CHECK-NEXT: ret float %data 3817define amdgpu_hs float @tfe_check_assert() #0 { 3818 %data = call nsz <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) #2 3819 %elt0 = extractelement <4 x float> %data, i32 0 3820 ret float %elt0 3821} 3822 3823declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 3824 3825attributes #0 = { nounwind } 3826attributes #1 = { nounwind readonly } 3827 3828!0 = !{float 2.500000e+00} 3829