• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32
3; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched register/instruction).
4; This is already checked for in Atomics-64.ll
5; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64
6
7; In this file, we check that atomic load/store can make use of the indexed
8; versions of the instructions.
9
10; Indexed version of loads
11define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) {
12; PPC32-LABEL: load_x_i8_seq_cst:
13; PPC32:       # %bb.0:
14; PPC32-NEXT:    lis r4, 1
15; PPC32-NEXT:    sync
16; PPC32-NEXT:    ori r4, r4, 24464
17; PPC32-NEXT:    lbzx r3, r3, r4
18; PPC32-NEXT:    lwsync
19; PPC32-NEXT:    blr
20;
21; PPC64-LABEL: load_x_i8_seq_cst:
22; PPC64:       # %bb.0:
23; PPC64-NEXT:    lis r4, 1
24; PPC64-NEXT:    sync
25; PPC64-NEXT:    ori r4, r4, 24464
26; PPC64-NEXT:    lbzx r3, r3, r4
27; PPC64-NEXT:    cmpd cr7, r3, r3
28; PPC64-NEXT:    bne- cr7, .+4
29; PPC64-NEXT:    isync
30; PPC64-NEXT:    blr
31; CHECK-PPC32: lwsync
32; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
33; CHECK-PPC64: bne- [[CR]], .+4
34; CHECK-PPC64: isync
35  %ptr = getelementptr inbounds [100000 x i8], [100000 x i8]* %mem, i64 0, i64 90000
36  %val = load atomic i8, i8* %ptr seq_cst, align 1
37  ret i8 %val
38}
39define i16 @load_x_i16_acquire([100000 x i16]* %mem) {
40; PPC32-LABEL: load_x_i16_acquire:
41; PPC32:       # %bb.0:
42; PPC32-NEXT:    lis r4, 2
43; PPC32-NEXT:    ori r4, r4, 48928
44; PPC32-NEXT:    lhzx r3, r3, r4
45; PPC32-NEXT:    lwsync
46; PPC32-NEXT:    blr
47;
48; PPC64-LABEL: load_x_i16_acquire:
49; PPC64:       # %bb.0:
50; PPC64-NEXT:    lis r4, 2
51; PPC64-NEXT:    ori r4, r4, 48928
52; PPC64-NEXT:    lhzx r3, r3, r4
53; PPC64-NEXT:    cmpd cr7, r3, r3
54; PPC64-NEXT:    bne- cr7, .+4
55; PPC64-NEXT:    isync
56; PPC64-NEXT:    blr
57; CHECK-PPC32: lwsync
58; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
59; CHECK-PPC64: bne- [[CR]], .+4
60; CHECK-PPC64: isync
61  %ptr = getelementptr inbounds [100000 x i16], [100000 x i16]* %mem, i64 0, i64 90000
62  %val = load atomic i16, i16* %ptr acquire, align 2
63  ret i16 %val
64}
65define i32 @load_x_i32_monotonic([100000 x i32]* %mem) {
66; CHECK-LABEL: load_x_i32_monotonic:
67; CHECK:       # %bb.0:
68; CHECK-NEXT:    lis r4, 5
69; CHECK-NEXT:    ori r4, r4, 32320
70; CHECK-NEXT:    lwzx r3, r3, r4
71; CHECK-NEXT:    blr
72  %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000
73  %val = load atomic i32, i32* %ptr monotonic, align 4
74  ret i32 %val
75}
76define i64 @load_x_i64_unordered([100000 x i64]* %mem) {
77; PPC32-LABEL: load_x_i64_unordered:
78; PPC32:       # %bb.0:
79; PPC32-NEXT:    mflr r0
80; PPC32-NEXT:    stw r0, 4(r1)
81; PPC32-NEXT:    stwu r1, -16(r1)
82; PPC32-NEXT:    .cfi_def_cfa_offset 16
83; PPC32-NEXT:    .cfi_offset lr, 4
84; PPC32-NEXT:    addi r3, r3, -896
85; PPC32-NEXT:    addis r3, r3, 11
86; PPC32-NEXT:    li r4, 0
87; PPC32-NEXT:    bl __atomic_load_8
88; PPC32-NEXT:    lwz r0, 20(r1)
89; PPC32-NEXT:    addi r1, r1, 16
90; PPC32-NEXT:    mtlr r0
91; PPC32-NEXT:    blr
92;
93; PPC64-LABEL: load_x_i64_unordered:
94; PPC64:       # %bb.0:
95; PPC64-NEXT:    lis r4, 10
96; PPC64-NEXT:    ori r4, r4, 64640
97; PPC64-NEXT:    ldx r3, r3, r4
98; PPC64-NEXT:    blr
99  %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000
100  %val = load atomic i64, i64* %ptr unordered, align 8
101  ret i64 %val
102}
103
104; Indexed version of stores
105define void @store_x_i8_seq_cst([100000 x i8]* %mem) {
106; CHECK-LABEL: store_x_i8_seq_cst:
107; CHECK:       # %bb.0:
108; CHECK-NEXT:    lis r4, 1
109; CHECK-NEXT:    ori r4, r4, 24464
110; CHECK-NEXT:    li r5, 42
111; CHECK-NEXT:    sync
112; CHECK-NEXT:    stbx r5, r3, r4
113; CHECK-NEXT:    blr
114  %ptr = getelementptr inbounds [100000 x i8], [100000 x i8]* %mem, i64 0, i64 90000
115  store atomic i8 42, i8* %ptr seq_cst, align 1
116  ret void
117}
118define void @store_x_i16_release([100000 x i16]* %mem) {
119; CHECK-LABEL: store_x_i16_release:
120; CHECK:       # %bb.0:
121; CHECK-NEXT:    lis r4, 2
122; CHECK-NEXT:    ori r4, r4, 48928
123; CHECK-NEXT:    li r5, 42
124; CHECK-NEXT:    lwsync
125; CHECK-NEXT:    sthx r5, r3, r4
126; CHECK-NEXT:    blr
127  %ptr = getelementptr inbounds [100000 x i16], [100000 x i16]* %mem, i64 0, i64 90000
128  store atomic i16 42, i16* %ptr release, align 2
129  ret void
130}
131define void @store_x_i32_monotonic([100000 x i32]* %mem) {
132; CHECK-LABEL: store_x_i32_monotonic:
133; CHECK:       # %bb.0:
134; CHECK-NEXT:    lis r4, 5
135; CHECK-NEXT:    ori r4, r4, 32320
136; CHECK-NEXT:    li r5, 42
137; CHECK-NEXT:    stwx r5, r3, r4
138; CHECK-NEXT:    blr
139  %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000
140  store atomic i32 42, i32* %ptr monotonic, align 4
141  ret void
142}
143define void @store_x_i64_unordered([100000 x i64]* %mem) {
144; PPC32-LABEL: store_x_i64_unordered:
145; PPC32:       # %bb.0:
146; PPC32-NEXT:    mflr r0
147; PPC32-NEXT:    stw r0, 4(r1)
148; PPC32-NEXT:    stwu r1, -16(r1)
149; PPC32-NEXT:    .cfi_def_cfa_offset 16
150; PPC32-NEXT:    .cfi_offset lr, 4
151; PPC32-NEXT:    addi r3, r3, -896
152; PPC32-NEXT:    addis r3, r3, 11
153; PPC32-NEXT:    li r5, 0
154; PPC32-NEXT:    li r6, 42
155; PPC32-NEXT:    li r7, 0
156; PPC32-NEXT:    bl __atomic_store_8
157; PPC32-NEXT:    lwz r0, 20(r1)
158; PPC32-NEXT:    addi r1, r1, 16
159; PPC32-NEXT:    mtlr r0
160; PPC32-NEXT:    blr
161;
162; PPC64-LABEL: store_x_i64_unordered:
163; PPC64:       # %bb.0:
164; PPC64-NEXT:    lis r4, 10
165; PPC64-NEXT:    ori r4, r4, 64640
166; PPC64-NEXT:    li r5, 42
167; PPC64-NEXT:    stdx r5, r3, r4
168; PPC64-NEXT:    blr
169  %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000
170  store atomic i64 42, i64* %ptr unordered, align 8
171  ret void
172}
173