• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
2
3; The type promotion for the vector loads v3i32/v3f32 into v4i32/v4f32 is enabled
4; only when the alignment is 8-byte or higher.
5; Otherwise, split the load into two separate loads (dwordx2 + dword).
6; This type promotion on smaller aligned loads can cause a page fault error
7; while accessing one extra dword beyond the buffer.
8
9define protected amdgpu_kernel void @load_v3i32_align4(<3 x i32> addrspace(1)* %arg) #0 {
10; GCN-LABEL: load_v3i32_align4:
11; GCN:       ; %bb.0:
12; GCN:         s_waitcnt lgkmcnt(0)
13; GCN-NEXT:    s_load_dwordx2 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x0
14; GCN-NEXT:    s_load_dword s{{[0-9]+}}, s[0:1], 0x8
15  %vec = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 4
16  store <3 x i32> %vec, <3 x i32> addrspace(1)* undef, align 4
17  ret void
18}
19
20define protected amdgpu_kernel void @load_v3i32_align8(<3 x i32> addrspace(1)* %arg) #0 {
21; GCN-LABEL: load_v3i32_align8:
22; GCN:       ; %bb.0:
23; GCN:         s_waitcnt lgkmcnt(0)
24; GCN-NEXT:    s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x0
25  %vec = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 8
26  store <3 x i32> %vec, <3 x i32> addrspace(1)* undef, align 8
27  ret void
28}
29
30define protected amdgpu_kernel void @load_v3i32_align16(<3 x i32> addrspace(1)* %arg) #0 {
31; GCN-LABEL: load_v3i32_align16:
32; GCN:       ; %bb.0:
33; GCN:         s_waitcnt lgkmcnt(0)
34; GCN-NEXT:    s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x0
35  %vec = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 16
36  store <3 x i32> %vec, <3 x i32> addrspace(1)* undef, align 16
37  ret void
38}
39
40define protected amdgpu_kernel void @load_v3f32_align4(<3 x float> addrspace(1)* %arg) #0 {
41; GCN-LABEL: load_v3f32_align4:
42; GCN:       ; %bb.0:
43; GCN:         s_waitcnt lgkmcnt(0)
44; GCN-NEXT:    s_load_dwordx2 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x0
45; GCN-NEXT:    s_load_dword s{{[0-9]+}}, s[0:1], 0x8
46  %vec = load <3 x float>, <3 x float> addrspace(1)* %arg, align 4
47  store <3 x float> %vec, <3 x float> addrspace(1)* undef, align 4
48  ret void
49}
50
51define protected amdgpu_kernel void @load_v3f32_align8(<3 x float> addrspace(1)* %arg) #0 {
52; GCN-LABEL: load_v3f32_align8:
53; GCN:       ; %bb.0:
54; GCN:         s_waitcnt lgkmcnt(0)
55; GCN-NEXT:    s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x0
56  %vec = load <3 x float>, <3 x float> addrspace(1)* %arg, align 8
57  store <3 x float> %vec, <3 x float> addrspace(1)* undef, align 8
58  ret void
59}
60
61define protected amdgpu_kernel void @load_v3f32_align16(<3 x float> addrspace(1)* %arg) #0 {
62; GCN-LABEL: load_v3f32_align16:
63; GCN:       ; %bb.0:
64; GCN:         s_waitcnt lgkmcnt(0)
65; GCN-NEXT:    s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x0
66  %vec = load <3 x float>, <3 x float> addrspace(1)* %arg, align 16
67  store <3 x float> %vec, <3 x float> addrspace(1)* undef, align 16
68  ret void
69}
70
71attributes #0 = { nounwind noinline }
72