• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -amdgpu-codegenprepare-widen-constant-loads=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
2; RUN: llc -amdgpu-codegenprepare-widen-constant-loads=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
3
4; GCN-LABEL: {{^}}widen_i16_constant_load:
5; GCN: s_load_dword [[VAL:s[0-9]+]]
6; GCN: s_addk_i32 [[VAL]], 0x3e7
7; GCN: s_or_b32 [[OR:s[0-9]+]], [[VAL]], 4
8define amdgpu_kernel void @widen_i16_constant_load(i16 addrspace(4)* %arg) {
9  %load = load i16, i16 addrspace(4)* %arg, align 4
10  %add = add i16 %load, 999
11  %or = or i16 %add, 4
12  store i16 %or, i16 addrspace(1)* null
13  ret void
14}
15
16; GCN-LABEL: {{^}}widen_i16_constant_load_zext_i32:
17; GCN: s_load_dword [[VAL:s[0-9]+]]
18; GCN: s_and_b32 [[TRUNC:s[0-9]+]], [[VAL]], 0xffff{{$}}
19; GCN: s_addk_i32 [[TRUNC]], 0x3e7
20; GCN: s_or_b32 [[OR:s[0-9]+]], [[TRUNC]], 4
21define amdgpu_kernel void @widen_i16_constant_load_zext_i32(i16 addrspace(4)* %arg) {
22  %load = load i16, i16 addrspace(4)* %arg, align 4
23  %ext = zext i16 %load to i32
24  %add = add i32 %ext, 999
25  %or = or i32 %add, 4
26  store i32 %or, i32 addrspace(1)* null
27  ret void
28}
29
30; GCN-LABEL: {{^}}widen_i16_constant_load_sext_i32:
31; GCN: s_load_dword [[VAL:s[0-9]+]]
32; GCN: s_sext_i32_i16 [[EXT:s[0-9]+]], [[VAL]]
33; GCN: s_addk_i32 [[EXT]], 0x3e7
34; GCN: s_or_b32 [[OR:s[0-9]+]], [[EXT]], 4
35define amdgpu_kernel void @widen_i16_constant_load_sext_i32(i16 addrspace(4)* %arg) {
36  %load = load i16, i16 addrspace(4)* %arg, align 4
37  %ext = sext i16 %load to i32
38  %add = add i32 %ext, 999
39  %or = or i32 %add, 4
40  store i32 %or, i32 addrspace(1)* null
41  ret void
42}
43
44; GCN-LABEL: {{^}}widen_i17_constant_load:
45; GCN: s_load_dword [[VAL:s[0-9]+]]
46; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 34
47; GCN: s_or_b32 [[OR:s[0-9]+]], [[ADD]], 4
48; GCN: s_bfe_u32 s{{[0-9]+}}, [[OR]], 0x10010
49define amdgpu_kernel void @widen_i17_constant_load(i17 addrspace(4)* %arg) {
50  %load = load i17, i17 addrspace(4)* %arg, align 4
51  %add = add i17 %load, 34
52  %or = or i17 %add, 4
53  store i17 %or, i17 addrspace(1)* null
54  ret void
55}
56
57; GCN-LABEL: {{^}}widen_f16_constant_load:
58; GCN: s_load_dword [[VAL:s[0-9]+]]
59; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[VAL]]
60; SI: v_add_f32_e32 [[ADD:v[0-9]+]], 4.0, [[CVT]]
61
62; VI: v_add_f16_e64 [[ADD:v[0-9]+]], [[VAL]], 4.0
63define amdgpu_kernel void @widen_f16_constant_load(half addrspace(4)* %arg) {
64  %load = load half, half addrspace(4)* %arg, align 4
65  %add = fadd half %load, 4.0
66  store half %add, half addrspace(1)* null
67  ret void
68}
69
70; FIXME: valu usage on VI
71; GCN-LABEL: {{^}}widen_v2i8_constant_load:
72; GCN: s_load_dword [[VAL:s[0-9]+]]
73
74; SI: s_add_i32
75; SI: s_or_b32
76; SI: s_addk_i32
77; SI: s_and_b32
78; SI: s_or_b32
79; SI: s_or_b32
80
81; VI: s_add_i32
82; VI: v_add_u32_sdwa
83; VI: v_or_b32_sdwa
84; VI: v_or_b32_e32
85define amdgpu_kernel void @widen_v2i8_constant_load(<2 x i8> addrspace(4)* %arg) {
86  %load = load <2 x i8>, <2 x i8> addrspace(4)* %arg, align 4
87  %add = add <2 x i8> %load, <i8 12, i8 44>
88  %or = or <2 x i8> %add, <i8 4, i8 3>
89  store <2 x i8> %or, <2 x i8> addrspace(1)* null
90  ret void
91}
92
93; GCN-LABEL: {{^}}no_widen_i16_constant_divergent_load:
94; GCN: {{buffer|flat}}_load_ushort
95define amdgpu_kernel void @no_widen_i16_constant_divergent_load(i16 addrspace(4)* %arg) {
96  %tid = call i32 @llvm.amdgcn.workitem.id.x()
97  %tid.ext = zext i32 %tid to i64
98  %gep.arg = getelementptr inbounds i16, i16 addrspace(4)* %arg, i64 %tid.ext
99  %load = load i16, i16 addrspace(4)* %gep.arg, align 4
100  %add = add i16 %load, 999
101  %or = or i16 %add, 4
102  store i16 %or, i16 addrspace(1)* null
103  ret void
104}
105
106; GCN-LABEL: {{^}}widen_i1_constant_load:
107; GCN: s_load_dword [[VAL:s[0-9]+]]
108; GCN: s_and_b32 {{s[0-9]+}}, [[VAL]], 1{{$}}
109define amdgpu_kernel void @widen_i1_constant_load(i1 addrspace(4)* %arg) {
110  %load = load i1, i1 addrspace(4)* %arg, align 4
111  %and = and i1 %load, true
112  store i1 %and, i1 addrspace(1)* null
113  ret void
114}
115
116; GCN-LABEL: {{^}}widen_i16_zextload_i64_constant_load:
117; GCN: s_load_dword [[VAL:s[0-9]+]]
118; GCN: s_and_b32 [[TRUNC:s[0-9]+]], [[VAL]], 0xffff{{$}}
119; GCN: s_addk_i32 [[TRUNC]], 0x3e7
120; GCN: s_or_b32 [[OR:s[0-9]+]], [[TRUNC]], 4
121define amdgpu_kernel void @widen_i16_zextload_i64_constant_load(i16 addrspace(4)* %arg) {
122  %load = load i16, i16 addrspace(4)* %arg, align 4
123  %zext = zext i16 %load to i32
124  %add = add i32 %zext, 999
125  %or = or i32 %add, 4
126  store i32 %or, i32 addrspace(1)* null
127  ret void
128}
129
130; GCN-LABEL: {{^}}widen_i1_zext_to_i64_constant_load:
131; GCN: s_load_dword [[VAL:s[0-9]+]]
132; GCN: s_and_b32 [[AND:s[0-9]+]], [[VAL]], 1
133; GCN: s_add_u32 [[ADD:s[0-9]+]], [[AND]], 0x3e7
134; GCN: s_addc_u32 s{{[0-9]+}}, 0, 0
135define amdgpu_kernel void @widen_i1_zext_to_i64_constant_load(i1 addrspace(4)* %arg) {
136  %load = load i1, i1 addrspace(4)* %arg, align 4
137  %zext = zext i1 %load to i64
138  %add = add i64 %zext, 999
139  store i64 %add, i64 addrspace(1)* null
140  ret void
141}
142
143; GCN-LABEL: {{^}}widen_i16_constant32_load:
144; GCN: s_load_dword [[VAL:s[0-9]+]]
145; GCN: s_addk_i32 [[VAL]], 0x3e7
146; GCN: s_or_b32 [[OR:s[0-9]+]], [[VAL]], 4
147define amdgpu_kernel void @widen_i16_constant32_load(i16 addrspace(6)* %arg) {
148  %load = load i16, i16 addrspace(6)* %arg, align 4
149  %add = add i16 %load, 999
150  %or = or i16 %add, 4
151  store i16 %or, i16 addrspace(1)* null
152  ret void
153}
154
155; GCN-LABEL: {{^}}widen_i16_global_invariant_load:
156; GCN: s_load_dword [[VAL:s[0-9]+]]
157; GCN: s_addk_i32 [[VAL]], 0x3e7
158; GCN: s_or_b32 [[OR:s[0-9]+]], [[VAL]], 1
159define amdgpu_kernel void @widen_i16_global_invariant_load(i16 addrspace(1)* %arg) {
160  %load = load i16, i16 addrspace(1)* %arg, align 4, !invariant.load !0
161  %add = add i16 %load, 999
162  %or = or i16 %add, 1
163  store i16 %or, i16 addrspace(1)* null
164  ret void
165}
166
167declare i32 @llvm.amdgcn.workitem.id.x()
168
169!0 = !{}
170