1; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs <%s | FileCheck -enable-var-scope -check-prefixes=GCN,SICI,SI %s 2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs <%s | FileCheck -enable-var-scope -check-prefixes=GCN,SICI,CI %s 3 4; Check that an addrspace(1) (const) load with various combinations of 5; uniform, nonuniform and constant address components all load with an 6; addr64 mubuf with no readfirstlane. 7 8@indexable = internal unnamed_addr addrspace(1) constant [6 x <3 x float>] [<3 x float> <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00>, <3 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00>, <3 x float> <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, <3 x float> <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 0.000000e+00>] 9 10; GCN-LABEL: {{^}}nonuniform_uniform: 11; GCN-NOT: readfirstlane 12; SI: buffer_load_dwordx4 {{.*}} addr64 13; CI: buffer_load_dwordx3 {{.*}} addr64 14 15define amdgpu_ps float @nonuniform_uniform(i32 %arg18) { 16.entry: 17 %tmp31 = sext i32 %arg18 to i64 18 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* @indexable, i64 0, i64 %tmp31 19 %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16 20 %tmp34 = extractelement <3 x float> %tmp33, i32 0 21 ret float %tmp34 22} 23 24; GCN-LABEL: {{^}}uniform_nonuniform: 25; GCN-NOT: readfirstlane 26; SI: buffer_load_dwordx4 {{.*}} addr64 27; CI: buffer_load_dwordx3 {{.*}} addr64 28 29define amdgpu_ps float @uniform_nonuniform(i32 inreg %offset, i32 %arg18) { 30.entry: 31 %tmp1 = zext i32 %arg18 to i64 32 %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* 33 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset 34 %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16 35 %tmp34 = extractelement <3 x float> %tmp33, i32 0 36 ret float %tmp34 37} 38 39; GCN-LABEL: {{^}}const_nonuniform: 40; GCN-NOT: readfirstlane 41; SI: buffer_load_dwordx4 {{.*}} addr64 42; CI: buffer_load_dwordx3 {{.*}} addr64 43 44define amdgpu_ps float @const_nonuniform(i32 %arg18) { 45.entry: 46 %tmp1 = zext i32 %arg18 to i64 47 %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* 48 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 1 49 %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16 50 %tmp34 = extractelement <3 x float> %tmp33, i32 0 51 ret float %tmp34 52} 53 54; GCN-LABEL: {{^}}nonuniform_nonuniform: 55; GCN-NOT: readfirstlane 56; SI: buffer_load_dwordx4 {{.*}} addr64 57; CI: buffer_load_dwordx3 {{.*}} addr64 58 59define amdgpu_ps float @nonuniform_nonuniform(i32 %offset, i32 %arg18) { 60.entry: 61 %tmp1 = zext i32 %arg18 to i64 62 %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* 63 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset 64 %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16 65 %tmp34 = extractelement <3 x float> %tmp33, i32 0 66 ret float %tmp34 67} 68 69; GCN-LABEL: {{^}}nonuniform_uniform_const: 70; GCN-NOT: readfirstlane 71; SICI: buffer_load_dword {{.*}} addr64 72 73define amdgpu_ps float @nonuniform_uniform_const(i32 %arg18) { 74.entry: 75 %tmp31 = sext i32 %arg18 to i64 76 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* @indexable, i64 0, i64 %tmp31, i64 1 77 %tmp33 = load float, float addrspace(1)* %tmp32, align 4 78 ret float %tmp33 79} 80 81; GCN-LABEL: {{^}}uniform_nonuniform_const: 82; GCN-NOT: readfirstlane 83; SICI: buffer_load_dword {{.*}} addr64 84 85define amdgpu_ps float @uniform_nonuniform_const(i32 inreg %offset, i32 %arg18) { 86.entry: 87 %tmp1 = zext i32 %arg18 to i64 88 %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* 89 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset, i32 1 90 %tmp33 = load float, float addrspace(1)* %tmp32, align 4 91 ret float %tmp33 92} 93 94; GCN-LABEL: {{^}}nonuniform_nonuniform_const: 95; GCN-NOT: readfirstlane 96; SICI: buffer_load_dword {{.*}} addr64 97 98define amdgpu_ps float @nonuniform_nonuniform_const(i32 %offset, i32 %arg18) { 99.entry: 100 %tmp1 = zext i32 %arg18 to i64 101 %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* 102 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset, i32 1 103 %tmp33 = load float, float addrspace(1)* %tmp32, align 4 104 ret float %tmp33 105} 106 107 108 109 110