• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=X64
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-slh-lfence | FileCheck %s --check-prefix=X64-LFENCE
4;
5; FIXME: Add support for 32-bit and other EH ABIs.
6
7declare void @leak(i32 %v1, i32 %v2)
8
9declare void @sink(i32)
10
11define i32 @test_trivial_entry_load(i32* %ptr) speculative_load_hardening {
12; X64-LABEL: test_trivial_entry_load:
13; X64:       # %bb.0: # %entry
14; X64-NEXT:    movq %rsp, %rcx
15; X64-NEXT:    movq $-1, %rax
16; X64-NEXT:    sarq $63, %rcx
17; X64-NEXT:    movl (%rdi), %eax
18; X64-NEXT:    orl %ecx, %eax
19; X64-NEXT:    shlq $47, %rcx
20; X64-NEXT:    orq %rcx, %rsp
21; X64-NEXT:    retq
22;
23; X64-LFENCE-LABEL: test_trivial_entry_load:
24; X64-LFENCE:       # %bb.0: # %entry
25; X64-LFENCE-NEXT:    movl (%rdi), %eax
26; X64-LFENCE-NEXT:    retq
27entry:
28  %v = load i32, i32* %ptr
29  ret i32 %v
30}
31
32define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr2, i32** %ptr3) speculative_load_hardening {
33; X64-LABEL: test_basic_conditions:
34; X64:       # %bb.0: # %entry
35; X64-NEXT:    pushq %r15
36; X64-NEXT:    .cfi_def_cfa_offset 16
37; X64-NEXT:    pushq %r14
38; X64-NEXT:    .cfi_def_cfa_offset 24
39; X64-NEXT:    pushq %rbx
40; X64-NEXT:    .cfi_def_cfa_offset 32
41; X64-NEXT:    .cfi_offset %rbx, -32
42; X64-NEXT:    .cfi_offset %r14, -24
43; X64-NEXT:    .cfi_offset %r15, -16
44; X64-NEXT:    movq %rsp, %rax
45; X64-NEXT:    movq $-1, %rbx
46; X64-NEXT:    sarq $63, %rax
47; X64-NEXT:    testl %edi, %edi
48; X64-NEXT:    jne .LBB1_1
49; X64-NEXT:  # %bb.2: # %then1
50; X64-NEXT:    cmovneq %rbx, %rax
51; X64-NEXT:    testl %esi, %esi
52; X64-NEXT:    je .LBB1_4
53; X64-NEXT:  .LBB1_1:
54; X64-NEXT:    cmoveq %rbx, %rax
55; X64-NEXT:  .LBB1_8: # %exit
56; X64-NEXT:    shlq $47, %rax
57; X64-NEXT:    orq %rax, %rsp
58; X64-NEXT:    popq %rbx
59; X64-NEXT:    .cfi_def_cfa_offset 24
60; X64-NEXT:    popq %r14
61; X64-NEXT:    .cfi_def_cfa_offset 16
62; X64-NEXT:    popq %r15
63; X64-NEXT:    .cfi_def_cfa_offset 8
64; X64-NEXT:    retq
65; X64-NEXT:  .LBB1_4: # %then2
66; X64-NEXT:    .cfi_def_cfa_offset 32
67; X64-NEXT:    movq %r8, %r14
68; X64-NEXT:    cmovneq %rbx, %rax
69; X64-NEXT:    testl %edx, %edx
70; X64-NEXT:    je .LBB1_6
71; X64-NEXT:  # %bb.5: # %else3
72; X64-NEXT:    cmoveq %rbx, %rax
73; X64-NEXT:    movslq (%r9), %rcx
74; X64-NEXT:    orq %rax, %rcx
75; X64-NEXT:    leaq (%r14,%rcx,4), %r15
76; X64-NEXT:    movl %ecx, (%r14,%rcx,4)
77; X64-NEXT:    jmp .LBB1_7
78; X64-NEXT:  .LBB1_6: # %then3
79; X64-NEXT:    cmovneq %rbx, %rax
80; X64-NEXT:    movl (%rcx), %ecx
81; X64-NEXT:    addl (%r14), %ecx
82; X64-NEXT:    movslq %ecx, %rdi
83; X64-NEXT:    orq %rax, %rdi
84; X64-NEXT:    movl (%r14,%rdi,4), %esi
85; X64-NEXT:    orl %eax, %esi
86; X64-NEXT:    movq (%r9), %r15
87; X64-NEXT:    orq %rax, %r15
88; X64-NEXT:    addl (%r15), %esi
89; X64-NEXT:    shlq $47, %rax
90; X64-NEXT:    # kill: def $edi killed $edi killed $rdi
91; X64-NEXT:    orq %rax, %rsp
92; X64-NEXT:    callq leak
93; X64-NEXT:  .Lslh_ret_addr0:
94; X64-NEXT:    movq %rsp, %rax
95; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
96; X64-NEXT:    sarq $63, %rax
97; X64-NEXT:    cmpq $.Lslh_ret_addr0, %rcx
98; X64-NEXT:    cmovneq %rbx, %rax
99; X64-NEXT:  .LBB1_7: # %merge
100; X64-NEXT:    movslq (%r15), %rcx
101; X64-NEXT:    orq %rax, %rcx
102; X64-NEXT:    movl $0, (%r14,%rcx,4)
103; X64-NEXT:    jmp .LBB1_8
104;
105; X64-LFENCE-LABEL: test_basic_conditions:
106; X64-LFENCE:       # %bb.0: # %entry
107; X64-LFENCE-NEXT:    pushq %r14
108; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 16
109; X64-LFENCE-NEXT:    pushq %rbx
110; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 24
111; X64-LFENCE-NEXT:    pushq %rax
112; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 32
113; X64-LFENCE-NEXT:    .cfi_offset %rbx, -24
114; X64-LFENCE-NEXT:    .cfi_offset %r14, -16
115; X64-LFENCE-NEXT:    testl %edi, %edi
116; X64-LFENCE-NEXT:    jne .LBB1_6
117; X64-LFENCE-NEXT:  # %bb.1: # %then1
118; X64-LFENCE-NEXT:    lfence
119; X64-LFENCE-NEXT:    testl %esi, %esi
120; X64-LFENCE-NEXT:    jne .LBB1_6
121; X64-LFENCE-NEXT:  # %bb.2: # %then2
122; X64-LFENCE-NEXT:    movq %r8, %rbx
123; X64-LFENCE-NEXT:    lfence
124; X64-LFENCE-NEXT:    testl %edx, %edx
125; X64-LFENCE-NEXT:    je .LBB1_3
126; X64-LFENCE-NEXT:  # %bb.4: # %else3
127; X64-LFENCE-NEXT:    lfence
128; X64-LFENCE-NEXT:    movslq (%r9), %rax
129; X64-LFENCE-NEXT:    leaq (%rbx,%rax,4), %r14
130; X64-LFENCE-NEXT:    movl %eax, (%rbx,%rax,4)
131; X64-LFENCE-NEXT:    jmp .LBB1_5
132; X64-LFENCE-NEXT:  .LBB1_3: # %then3
133; X64-LFENCE-NEXT:    lfence
134; X64-LFENCE-NEXT:    movl (%rcx), %eax
135; X64-LFENCE-NEXT:    addl (%rbx), %eax
136; X64-LFENCE-NEXT:    movslq %eax, %rdi
137; X64-LFENCE-NEXT:    movl (%rbx,%rdi,4), %esi
138; X64-LFENCE-NEXT:    movq (%r9), %r14
139; X64-LFENCE-NEXT:    addl (%r14), %esi
140; X64-LFENCE-NEXT:    # kill: def $edi killed $edi killed $rdi
141; X64-LFENCE-NEXT:    callq leak
142; X64-LFENCE-NEXT:  .LBB1_5: # %merge
143; X64-LFENCE-NEXT:    movslq (%r14), %rax
144; X64-LFENCE-NEXT:    movl $0, (%rbx,%rax,4)
145; X64-LFENCE-NEXT:  .LBB1_6: # %exit
146; X64-LFENCE-NEXT:    lfence
147; X64-LFENCE-NEXT:    addq $8, %rsp
148; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 24
149; X64-LFENCE-NEXT:    popq %rbx
150; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 16
151; X64-LFENCE-NEXT:    popq %r14
152; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 8
153; X64-LFENCE-NEXT:    retq
154entry:
155  %a.cmp = icmp eq i32 %a, 0
156  br i1 %a.cmp, label %then1, label %exit
157
158then1:
159  %b.cmp = icmp eq i32 %b, 0
160  br i1 %b.cmp, label %then2, label %exit
161
162then2:
163  %c.cmp = icmp eq i32 %c, 0
164  br i1 %c.cmp, label %then3, label %else3
165
166then3:
167  %secret1 = load i32, i32* %ptr1
168  %secret2 = load i32, i32* %ptr2
169  %secret.sum1 = add i32 %secret1, %secret2
170  %ptr2.idx = getelementptr i32, i32* %ptr2, i32 %secret.sum1
171  %secret3 = load i32, i32* %ptr2.idx
172  %secret4 = load i32*, i32** %ptr3
173  %secret5 = load i32, i32* %secret4
174  %secret.sum2 = add i32 %secret3, %secret5
175  call void @leak(i32 %secret.sum1, i32 %secret.sum2)
176  br label %merge
177
178else3:
179  %secret6 = load i32*, i32** %ptr3
180  %cast = ptrtoint i32* %secret6 to i32
181  %ptr2.idx2 = getelementptr i32, i32* %ptr2, i32 %cast
182  store i32 %cast, i32* %ptr2.idx2
183  br label %merge
184
185merge:
186  %phi = phi i32* [ %secret4, %then3 ], [ %ptr2.idx2, %else3 ]
187  %secret7 = load i32, i32* %phi
188  %ptr2.idx3 = getelementptr i32, i32* %ptr2, i32 %secret7
189  store i32 0, i32* %ptr2.idx3
190  br label %exit
191
192exit:
193  ret void
194}
195
196define void @test_basic_loop(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) nounwind speculative_load_hardening {
197; X64-LABEL: test_basic_loop:
198; X64:       # %bb.0: # %entry
199; X64-NEXT:    pushq %rbp
200; X64-NEXT:    pushq %r15
201; X64-NEXT:    pushq %r14
202; X64-NEXT:    pushq %r12
203; X64-NEXT:    pushq %rbx
204; X64-NEXT:    movq %rsp, %rax
205; X64-NEXT:    movq $-1, %r15
206; X64-NEXT:    sarq $63, %rax
207; X64-NEXT:    testl %edi, %edi
208; X64-NEXT:    je .LBB2_2
209; X64-NEXT:  # %bb.1:
210; X64-NEXT:    cmoveq %r15, %rax
211; X64-NEXT:    jmp .LBB2_5
212; X64-NEXT:  .LBB2_2: # %l.header.preheader
213; X64-NEXT:    movq %rcx, %r14
214; X64-NEXT:    movq %rdx, %r12
215; X64-NEXT:    movl %esi, %ebp
216; X64-NEXT:    cmovneq %r15, %rax
217; X64-NEXT:    xorl %ebx, %ebx
218; X64-NEXT:    .p2align 4, 0x90
219; X64-NEXT:  .LBB2_3: # %l.header
220; X64-NEXT:    # =>This Inner Loop Header: Depth=1
221; X64-NEXT:    movslq (%r12), %rcx
222; X64-NEXT:    orq %rax, %rcx
223; X64-NEXT:    movq %rax, %rdx
224; X64-NEXT:    orq %r14, %rdx
225; X64-NEXT:    movl (%rdx,%rcx,4), %edi
226; X64-NEXT:    shlq $47, %rax
227; X64-NEXT:    orq %rax, %rsp
228; X64-NEXT:    callq sink
229; X64-NEXT:  .Lslh_ret_addr1:
230; X64-NEXT:    movq %rsp, %rax
231; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
232; X64-NEXT:    sarq $63, %rax
233; X64-NEXT:    cmpq $.Lslh_ret_addr1, %rcx
234; X64-NEXT:    cmovneq %r15, %rax
235; X64-NEXT:    incl %ebx
236; X64-NEXT:    cmpl %ebp, %ebx
237; X64-NEXT:    jge .LBB2_4
238; X64-NEXT:  # %bb.6: # in Loop: Header=BB2_3 Depth=1
239; X64-NEXT:    cmovgeq %r15, %rax
240; X64-NEXT:    jmp .LBB2_3
241; X64-NEXT:  .LBB2_4:
242; X64-NEXT:    cmovlq %r15, %rax
243; X64-NEXT:  .LBB2_5: # %exit
244; X64-NEXT:    shlq $47, %rax
245; X64-NEXT:    orq %rax, %rsp
246; X64-NEXT:    popq %rbx
247; X64-NEXT:    popq %r12
248; X64-NEXT:    popq %r14
249; X64-NEXT:    popq %r15
250; X64-NEXT:    popq %rbp
251; X64-NEXT:    retq
252;
253; X64-LFENCE-LABEL: test_basic_loop:
254; X64-LFENCE:       # %bb.0: # %entry
255; X64-LFENCE-NEXT:    pushq %rbp
256; X64-LFENCE-NEXT:    pushq %r15
257; X64-LFENCE-NEXT:    pushq %r14
258; X64-LFENCE-NEXT:    pushq %rbx
259; X64-LFENCE-NEXT:    pushq %rax
260; X64-LFENCE-NEXT:    testl %edi, %edi
261; X64-LFENCE-NEXT:    jne .LBB2_3
262; X64-LFENCE-NEXT:  # %bb.1: # %l.header.preheader
263; X64-LFENCE-NEXT:    movq %rcx, %r14
264; X64-LFENCE-NEXT:    movq %rdx, %r15
265; X64-LFENCE-NEXT:    movl %esi, %ebp
266; X64-LFENCE-NEXT:    lfence
267; X64-LFENCE-NEXT:    xorl %ebx, %ebx
268; X64-LFENCE-NEXT:    .p2align 4, 0x90
269; X64-LFENCE-NEXT:  .LBB2_2: # %l.header
270; X64-LFENCE-NEXT:    # =>This Inner Loop Header: Depth=1
271; X64-LFENCE-NEXT:    lfence
272; X64-LFENCE-NEXT:    movslq (%r15), %rax
273; X64-LFENCE-NEXT:    movl (%r14,%rax,4), %edi
274; X64-LFENCE-NEXT:    callq sink
275; X64-LFENCE-NEXT:    incl %ebx
276; X64-LFENCE-NEXT:    cmpl %ebp, %ebx
277; X64-LFENCE-NEXT:    jl .LBB2_2
278; X64-LFENCE-NEXT:  .LBB2_3: # %exit
279; X64-LFENCE-NEXT:    lfence
280; X64-LFENCE-NEXT:    addq $8, %rsp
281; X64-LFENCE-NEXT:    popq %rbx
282; X64-LFENCE-NEXT:    popq %r14
283; X64-LFENCE-NEXT:    popq %r15
284; X64-LFENCE-NEXT:    popq %rbp
285; X64-LFENCE-NEXT:    retq
286entry:
287  %a.cmp = icmp eq i32 %a, 0
288  br i1 %a.cmp, label %l.header, label %exit
289
290l.header:
291  %i = phi i32 [ 0, %entry ], [ %i.next, %l.header ]
292  %secret = load i32, i32* %ptr1
293  %ptr2.idx = getelementptr i32, i32* %ptr2, i32 %secret
294  %leak = load i32, i32* %ptr2.idx
295  call void @sink(i32 %leak)
296  %i.next = add i32 %i, 1
297  %i.cmp = icmp slt i32 %i.next, %b
298  br i1 %i.cmp, label %l.header, label %exit
299
300exit:
301  ret void
302}
303
304define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr2) nounwind speculative_load_hardening {
305; X64-LABEL: test_basic_nested_loop:
306; X64:       # %bb.0: # %entry
307; X64-NEXT:    pushq %rbp
308; X64-NEXT:    pushq %r15
309; X64-NEXT:    pushq %r14
310; X64-NEXT:    pushq %r13
311; X64-NEXT:    pushq %r12
312; X64-NEXT:    pushq %rbx
313; X64-NEXT:    pushq %rax
314; X64-NEXT:    movq %rsp, %rax
315; X64-NEXT:    movq $-1, %rbp
316; X64-NEXT:    sarq $63, %rax
317; X64-NEXT:    testl %edi, %edi
318; X64-NEXT:    je .LBB3_2
319; X64-NEXT:  # %bb.1:
320; X64-NEXT:    cmoveq %rbp, %rax
321; X64-NEXT:    jmp .LBB3_10
322; X64-NEXT:  .LBB3_2: # %l1.header.preheader
323; X64-NEXT:    movq %r8, %r14
324; X64-NEXT:    movq %rcx, %rbx
325; X64-NEXT:    movl %edx, %r12d
326; X64-NEXT:    movl %esi, %r15d
327; X64-NEXT:    cmovneq %rbp, %rax
328; X64-NEXT:    xorl %r13d, %r13d
329; X64-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
330; X64-NEXT:    testl %r15d, %r15d
331; X64-NEXT:    jle .LBB3_4
332; X64-NEXT:    .p2align 4, 0x90
333; X64-NEXT:  .LBB3_5: # %l2.header.preheader
334; X64-NEXT:    cmovleq %rbp, %rax
335; X64-NEXT:    xorl %r15d, %r15d
336; X64-NEXT:    .p2align 4, 0x90
337; X64-NEXT:  .LBB3_6: # %l2.header
338; X64-NEXT:    # =>This Inner Loop Header: Depth=1
339; X64-NEXT:    movslq (%rbx), %rcx
340; X64-NEXT:    orq %rax, %rcx
341; X64-NEXT:    movq %rax, %rdx
342; X64-NEXT:    orq %r14, %rdx
343; X64-NEXT:    movl (%rdx,%rcx,4), %edi
344; X64-NEXT:    shlq $47, %rax
345; X64-NEXT:    orq %rax, %rsp
346; X64-NEXT:    callq sink
347; X64-NEXT:  .Lslh_ret_addr2:
348; X64-NEXT:    movq %rsp, %rax
349; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
350; X64-NEXT:    sarq $63, %rax
351; X64-NEXT:    cmpq $.Lslh_ret_addr2, %rcx
352; X64-NEXT:    cmovneq %rbp, %rax
353; X64-NEXT:    incl %r15d
354; X64-NEXT:    cmpl %r12d, %r15d
355; X64-NEXT:    jge .LBB3_7
356; X64-NEXT:  # %bb.11: # in Loop: Header=BB3_6 Depth=1
357; X64-NEXT:    cmovgeq %rbp, %rax
358; X64-NEXT:    jmp .LBB3_6
359; X64-NEXT:    .p2align 4, 0x90
360; X64-NEXT:  .LBB3_7:
361; X64-NEXT:    cmovlq %rbp, %rax
362; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload
363; X64-NEXT:    jmp .LBB3_8
364; X64-NEXT:    .p2align 4, 0x90
365; X64-NEXT:  .LBB3_4:
366; X64-NEXT:    cmovgq %rbp, %rax
367; X64-NEXT:  .LBB3_8: # %l1.latch
368; X64-NEXT:    movslq (%rbx), %rcx
369; X64-NEXT:    orq %rax, %rcx
370; X64-NEXT:    movq %rax, %rdx
371; X64-NEXT:    orq %r14, %rdx
372; X64-NEXT:    movl (%rdx,%rcx,4), %edi
373; X64-NEXT:    shlq $47, %rax
374; X64-NEXT:    orq %rax, %rsp
375; X64-NEXT:    callq sink
376; X64-NEXT:  .Lslh_ret_addr3:
377; X64-NEXT:    movq %rsp, %rax
378; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
379; X64-NEXT:    sarq $63, %rax
380; X64-NEXT:    cmpq $.Lslh_ret_addr3, %rcx
381; X64-NEXT:    cmovneq %rbp, %rax
382; X64-NEXT:    incl %r13d
383; X64-NEXT:    cmpl %r15d, %r13d
384; X64-NEXT:    jge .LBB3_9
385; X64-NEXT:  # %bb.12:
386; X64-NEXT:    cmovgeq %rbp, %rax
387; X64-NEXT:    testl %r15d, %r15d
388; X64-NEXT:    jg .LBB3_5
389; X64-NEXT:    jmp .LBB3_4
390; X64-NEXT:  .LBB3_9:
391; X64-NEXT:    cmovlq %rbp, %rax
392; X64-NEXT:  .LBB3_10: # %exit
393; X64-NEXT:    shlq $47, %rax
394; X64-NEXT:    orq %rax, %rsp
395; X64-NEXT:    addq $8, %rsp
396; X64-NEXT:    popq %rbx
397; X64-NEXT:    popq %r12
398; X64-NEXT:    popq %r13
399; X64-NEXT:    popq %r14
400; X64-NEXT:    popq %r15
401; X64-NEXT:    popq %rbp
402; X64-NEXT:    retq
403;
404; X64-LFENCE-LABEL: test_basic_nested_loop:
405; X64-LFENCE:       # %bb.0: # %entry
406; X64-LFENCE-NEXT:    pushq %rbp
407; X64-LFENCE-NEXT:    pushq %r15
408; X64-LFENCE-NEXT:    pushq %r14
409; X64-LFENCE-NEXT:    pushq %r13
410; X64-LFENCE-NEXT:    pushq %r12
411; X64-LFENCE-NEXT:    pushq %rbx
412; X64-LFENCE-NEXT:    pushq %rax
413; X64-LFENCE-NEXT:    testl %edi, %edi
414; X64-LFENCE-NEXT:    je .LBB3_1
415; X64-LFENCE-NEXT:  .LBB3_6: # %exit
416; X64-LFENCE-NEXT:    lfence
417; X64-LFENCE-NEXT:    addq $8, %rsp
418; X64-LFENCE-NEXT:    popq %rbx
419; X64-LFENCE-NEXT:    popq %r12
420; X64-LFENCE-NEXT:    popq %r13
421; X64-LFENCE-NEXT:    popq %r14
422; X64-LFENCE-NEXT:    popq %r15
423; X64-LFENCE-NEXT:    popq %rbp
424; X64-LFENCE-NEXT:    retq
425; X64-LFENCE-NEXT:  .LBB3_1: # %l1.header.preheader
426; X64-LFENCE-NEXT:    movq %r8, %r14
427; X64-LFENCE-NEXT:    movq %rcx, %rbx
428; X64-LFENCE-NEXT:    movl %edx, %r13d
429; X64-LFENCE-NEXT:    movl %esi, %r15d
430; X64-LFENCE-NEXT:    lfence
431; X64-LFENCE-NEXT:    xorl %r12d, %r12d
432; X64-LFENCE-NEXT:    jmp .LBB3_2
433; X64-LFENCE-NEXT:    .p2align 4, 0x90
434; X64-LFENCE-NEXT:  .LBB3_5: # %l1.latch
435; X64-LFENCE-NEXT:    # in Loop: Header=BB3_2 Depth=1
436; X64-LFENCE-NEXT:    lfence
437; X64-LFENCE-NEXT:    movslq (%rbx), %rax
438; X64-LFENCE-NEXT:    movl (%r14,%rax,4), %edi
439; X64-LFENCE-NEXT:    callq sink
440; X64-LFENCE-NEXT:    incl %r12d
441; X64-LFENCE-NEXT:    cmpl %r15d, %r12d
442; X64-LFENCE-NEXT:    jge .LBB3_6
443; X64-LFENCE-NEXT:  .LBB3_2: # %l1.header
444; X64-LFENCE-NEXT:    # =>This Loop Header: Depth=1
445; X64-LFENCE-NEXT:    # Child Loop BB3_4 Depth 2
446; X64-LFENCE-NEXT:    lfence
447; X64-LFENCE-NEXT:    testl %r15d, %r15d
448; X64-LFENCE-NEXT:    jle .LBB3_5
449; X64-LFENCE-NEXT:  # %bb.3: # %l2.header.preheader
450; X64-LFENCE-NEXT:    # in Loop: Header=BB3_2 Depth=1
451; X64-LFENCE-NEXT:    lfence
452; X64-LFENCE-NEXT:    xorl %ebp, %ebp
453; X64-LFENCE-NEXT:    .p2align 4, 0x90
454; X64-LFENCE-NEXT:  .LBB3_4: # %l2.header
455; X64-LFENCE-NEXT:    # Parent Loop BB3_2 Depth=1
456; X64-LFENCE-NEXT:    # => This Inner Loop Header: Depth=2
457; X64-LFENCE-NEXT:    lfence
458; X64-LFENCE-NEXT:    movslq (%rbx), %rax
459; X64-LFENCE-NEXT:    movl (%r14,%rax,4), %edi
460; X64-LFENCE-NEXT:    callq sink
461; X64-LFENCE-NEXT:    incl %ebp
462; X64-LFENCE-NEXT:    cmpl %r13d, %ebp
463; X64-LFENCE-NEXT:    jl .LBB3_4
464; X64-LFENCE-NEXT:    jmp .LBB3_5
465entry:
466  %a.cmp = icmp eq i32 %a, 0
467  br i1 %a.cmp, label %l1.header, label %exit
468
469l1.header:
470  %i = phi i32 [ 0, %entry ], [ %i.next, %l1.latch ]
471  %b.cmp = icmp sgt i32 %b, 0
472  br i1 %b.cmp, label %l2.header, label %l1.latch
473
474l2.header:
475  %j = phi i32 [ 0, %l1.header ], [ %j.next, %l2.header ]
476  %secret = load i32, i32* %ptr1
477  %ptr2.idx = getelementptr i32, i32* %ptr2, i32 %secret
478  %leak = load i32, i32* %ptr2.idx
479  call void @sink(i32 %leak)
480  %j.next = add i32 %j, 1
481  %j.cmp = icmp slt i32 %j.next, %c
482  br i1 %j.cmp, label %l2.header, label %l1.latch
483
484l1.latch:
485  %secret2 = load i32, i32* %ptr1
486  %ptr2.idx2 = getelementptr i32, i32* %ptr2, i32 %secret2
487  %leak2 = load i32, i32* %ptr2.idx2
488  call void @sink(i32 %leak2)
489  %i.next = add i32 %i, 1
490  %i.cmp = icmp slt i32 %i.next, %b
491  br i1 %i.cmp, label %l1.header, label %exit
492
493exit:
494  ret void
495}
496
497declare i32 @__gxx_personality_v0(...)
498
499declare i8* @__cxa_allocate_exception(i64) local_unnamed_addr
500
501declare void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr
502
503define void @test_basic_eh(i32 %a, i32* %ptr1, i32* %ptr2) speculative_load_hardening personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
504; X64-LABEL: test_basic_eh:
505; X64:       # %bb.0: # %entry
506; X64-NEXT:    pushq %rbp
507; X64-NEXT:    .cfi_def_cfa_offset 16
508; X64-NEXT:    pushq %r15
509; X64-NEXT:    .cfi_def_cfa_offset 24
510; X64-NEXT:    pushq %r14
511; X64-NEXT:    .cfi_def_cfa_offset 32
512; X64-NEXT:    pushq %rbx
513; X64-NEXT:    .cfi_def_cfa_offset 40
514; X64-NEXT:    pushq %rax
515; X64-NEXT:    .cfi_def_cfa_offset 48
516; X64-NEXT:    .cfi_offset %rbx, -40
517; X64-NEXT:    .cfi_offset %r14, -32
518; X64-NEXT:    .cfi_offset %r15, -24
519; X64-NEXT:    .cfi_offset %rbp, -16
520; X64-NEXT:    movq %rsp, %rax
521; X64-NEXT:    movq $-1, %r15
522; X64-NEXT:    sarq $63, %rax
523; X64-NEXT:    cmpl $41, %edi
524; X64-NEXT:    jg .LBB4_1
525; X64-NEXT:  # %bb.2: # %thrower
526; X64-NEXT:    movq %rdx, %r14
527; X64-NEXT:    movq %rsi, %rbx
528; X64-NEXT:    cmovgq %r15, %rax
529; X64-NEXT:    movslq %edi, %rcx
530; X64-NEXT:    movl (%rsi,%rcx,4), %ebp
531; X64-NEXT:    orl %eax, %ebp
532; X64-NEXT:    movl $4, %edi
533; X64-NEXT:    shlq $47, %rax
534; X64-NEXT:    orq %rax, %rsp
535; X64-NEXT:    callq __cxa_allocate_exception
536; X64-NEXT:  .Lslh_ret_addr4:
537; X64-NEXT:    movq %rsp, %rcx
538; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx
539; X64-NEXT:    sarq $63, %rcx
540; X64-NEXT:    cmpq $.Lslh_ret_addr4, %rdx
541; X64-NEXT:    cmovneq %r15, %rcx
542; X64-NEXT:    movl %ebp, (%rax)
543; X64-NEXT:  .Ltmp0:
544; X64-NEXT:    shlq $47, %rcx
545; X64-NEXT:    movq %rax, %rdi
546; X64-NEXT:    xorl %esi, %esi
547; X64-NEXT:    xorl %edx, %edx
548; X64-NEXT:    orq %rcx, %rsp
549; X64-NEXT:    callq __cxa_throw
550; X64-NEXT:  .Lslh_ret_addr5:
551; X64-NEXT:    movq %rsp, %rax
552; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
553; X64-NEXT:    sarq $63, %rax
554; X64-NEXT:    cmpq $.Lslh_ret_addr5, %rcx
555; X64-NEXT:    cmovneq %r15, %rax
556; X64-NEXT:  .Ltmp1:
557; X64-NEXT:    jmp .LBB4_3
558; X64-NEXT:  .LBB4_1:
559; X64-NEXT:    cmovleq %r15, %rax
560; X64-NEXT:  .LBB4_3: # %exit
561; X64-NEXT:    shlq $47, %rax
562; X64-NEXT:    orq %rax, %rsp
563; X64-NEXT:    addq $8, %rsp
564; X64-NEXT:    .cfi_def_cfa_offset 40
565; X64-NEXT:    popq %rbx
566; X64-NEXT:    .cfi_def_cfa_offset 32
567; X64-NEXT:    popq %r14
568; X64-NEXT:    .cfi_def_cfa_offset 24
569; X64-NEXT:    popq %r15
570; X64-NEXT:    .cfi_def_cfa_offset 16
571; X64-NEXT:    popq %rbp
572; X64-NEXT:    .cfi_def_cfa_offset 8
573; X64-NEXT:    retq
574; X64-NEXT:  .LBB4_4: # %lpad
575; X64-NEXT:    .cfi_def_cfa_offset 48
576; X64-NEXT:  .Ltmp2:
577; X64-NEXT:    movq %rsp, %rcx
578; X64-NEXT:    sarq $63, %rcx
579; X64-NEXT:    movl (%rax), %eax
580; X64-NEXT:    addl (%rbx), %eax
581; X64-NEXT:    cltq
582; X64-NEXT:    orq %rcx, %rax
583; X64-NEXT:    movl (%r14,%rax,4), %edi
584; X64-NEXT:    orl %ecx, %edi
585; X64-NEXT:    shlq $47, %rcx
586; X64-NEXT:    orq %rcx, %rsp
587; X64-NEXT:    callq sink
588; X64-NEXT:  .Lslh_ret_addr6:
589; X64-NEXT:    movq %rsp, %rax
590; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
591; X64-NEXT:    sarq $63, %rax
592; X64-NEXT:    cmpq $.Lslh_ret_addr6, %rcx
593; X64-NEXT:    cmovneq %r15, %rax
594;
595; X64-LFENCE-LABEL: test_basic_eh:
596; X64-LFENCE:       # %bb.0: # %entry
597; X64-LFENCE-NEXT:    pushq %rbp
598; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 16
599; X64-LFENCE-NEXT:    pushq %r14
600; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 24
601; X64-LFENCE-NEXT:    pushq %rbx
602; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 32
603; X64-LFENCE-NEXT:    .cfi_offset %rbx, -32
604; X64-LFENCE-NEXT:    .cfi_offset %r14, -24
605; X64-LFENCE-NEXT:    .cfi_offset %rbp, -16
606; X64-LFENCE-NEXT:    cmpl $41, %edi
607; X64-LFENCE-NEXT:    jg .LBB4_2
608; X64-LFENCE-NEXT:  # %bb.1: # %thrower
609; X64-LFENCE-NEXT:    movq %rdx, %r14
610; X64-LFENCE-NEXT:    movq %rsi, %rbx
611; X64-LFENCE-NEXT:    lfence
612; X64-LFENCE-NEXT:    movslq %edi, %rax
613; X64-LFENCE-NEXT:    movl (%rsi,%rax,4), %ebp
614; X64-LFENCE-NEXT:    movl $4, %edi
615; X64-LFENCE-NEXT:    callq __cxa_allocate_exception
616; X64-LFENCE-NEXT:    movl %ebp, (%rax)
617; X64-LFENCE-NEXT:  .Ltmp0:
618; X64-LFENCE-NEXT:    movq %rax, %rdi
619; X64-LFENCE-NEXT:    xorl %esi, %esi
620; X64-LFENCE-NEXT:    xorl %edx, %edx
621; X64-LFENCE-NEXT:    callq __cxa_throw
622; X64-LFENCE-NEXT:  .Ltmp1:
623; X64-LFENCE-NEXT:  .LBB4_2: # %exit
624; X64-LFENCE-NEXT:    lfence
625; X64-LFENCE-NEXT:    popq %rbx
626; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 24
627; X64-LFENCE-NEXT:    popq %r14
628; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 16
629; X64-LFENCE-NEXT:    popq %rbp
630; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 8
631; X64-LFENCE-NEXT:    retq
632; X64-LFENCE-NEXT:  .LBB4_3: # %lpad
633; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 32
634; X64-LFENCE-NEXT:  .Ltmp2:
635; X64-LFENCE-NEXT:    movl (%rax), %eax
636; X64-LFENCE-NEXT:    addl (%rbx), %eax
637; X64-LFENCE-NEXT:    cltq
638; X64-LFENCE-NEXT:    movl (%r14,%rax,4), %edi
639; X64-LFENCE-NEXT:    callq sink
640entry:
641  %a.cmp = icmp slt i32 %a, 42
642  br i1 %a.cmp, label %thrower, label %exit
643
644thrower:
645  %badidx = getelementptr i32, i32* %ptr1, i32 %a
646  %secret1 = load i32, i32* %badidx
647  %e.ptr = call i8* @__cxa_allocate_exception(i64 4)
648  %e.ptr.cast = bitcast i8* %e.ptr to i32*
649  store i32 %secret1, i32* %e.ptr.cast
650  invoke void @__cxa_throw(i8* %e.ptr, i8* null, i8* null)
651          to label %exit unwind label %lpad
652
653exit:
654  ret void
655
656lpad:
657  %e = landingpad { i8*, i32 }
658          catch i8* null
659  %e.catch.ptr = extractvalue { i8*, i32 } %e, 0
660  %e.catch.ptr.cast = bitcast i8* %e.catch.ptr to i32*
661  %secret1.catch = load i32, i32* %e.catch.ptr.cast
662  %secret2 = load i32, i32* %ptr1
663  %secret.sum = add i32 %secret1.catch, %secret2
664  %ptr2.idx = getelementptr i32, i32* %ptr2, i32 %secret.sum
665  %leak = load i32, i32* %ptr2.idx
666  call void @sink(i32 %leak)
667  unreachable
668}
669
670declare void @sink_float(float)
671declare void @sink_double(double)
672
673; Test direct and converting loads of floating point values.
674define void @test_fp_loads(float* %fptr, double* %dptr, i32* %i32ptr, i64* %i64ptr) nounwind speculative_load_hardening {
675; X64-LABEL: test_fp_loads:
676; X64:       # %bb.0: # %entry
677; X64-NEXT:    pushq %r15
678; X64-NEXT:    pushq %r14
679; X64-NEXT:    pushq %r13
680; X64-NEXT:    pushq %r12
681; X64-NEXT:    pushq %rbx
682; X64-NEXT:    movq %rsp, %rax
683; X64-NEXT:    movq %rcx, %r15
684; X64-NEXT:    movq %rdx, %r14
685; X64-NEXT:    movq %rsi, %rbx
686; X64-NEXT:    movq %rdi, %r12
687; X64-NEXT:    movq $-1, %r13
688; X64-NEXT:    sarq $63, %rax
689; X64-NEXT:    orq %rax, %r12
690; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
691; X64-NEXT:    shlq $47, %rax
692; X64-NEXT:    orq %rax, %rsp
693; X64-NEXT:    callq sink_float
694; X64-NEXT:  .Lslh_ret_addr7:
695; X64-NEXT:    movq %rsp, %rax
696; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
697; X64-NEXT:    sarq $63, %rax
698; X64-NEXT:    cmpq $.Lslh_ret_addr7, %rcx
699; X64-NEXT:    cmovneq %r13, %rax
700; X64-NEXT:    orq %rax, %rbx
701; X64-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
702; X64-NEXT:    shlq $47, %rax
703; X64-NEXT:    orq %rax, %rsp
704; X64-NEXT:    callq sink_double
705; X64-NEXT:  .Lslh_ret_addr8:
706; X64-NEXT:    movq %rsp, %rax
707; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
708; X64-NEXT:    sarq $63, %rax
709; X64-NEXT:    cmpq $.Lslh_ret_addr8, %rcx
710; X64-NEXT:    cmovneq %r13, %rax
711; X64-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
712; X64-NEXT:    cvtsd2ss %xmm0, %xmm0
713; X64-NEXT:    shlq $47, %rax
714; X64-NEXT:    orq %rax, %rsp
715; X64-NEXT:    callq sink_float
716; X64-NEXT:  .Lslh_ret_addr9:
717; X64-NEXT:    movq %rsp, %rax
718; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
719; X64-NEXT:    sarq $63, %rax
720; X64-NEXT:    cmpq $.Lslh_ret_addr9, %rcx
721; X64-NEXT:    cmovneq %r13, %rax
722; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
723; X64-NEXT:    cvtss2sd %xmm0, %xmm0
724; X64-NEXT:    shlq $47, %rax
725; X64-NEXT:    orq %rax, %rsp
726; X64-NEXT:    callq sink_double
727; X64-NEXT:  .Lslh_ret_addr10:
728; X64-NEXT:    movq %rsp, %rax
729; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
730; X64-NEXT:    sarq $63, %rax
731; X64-NEXT:    cmpq $.Lslh_ret_addr10, %rcx
732; X64-NEXT:    cmovneq %r13, %rax
733; X64-NEXT:    orq %rax, %r14
734; X64-NEXT:    xorps %xmm0, %xmm0
735; X64-NEXT:    cvtsi2ssl (%r14), %xmm0
736; X64-NEXT:    shlq $47, %rax
737; X64-NEXT:    orq %rax, %rsp
738; X64-NEXT:    callq sink_float
739; X64-NEXT:  .Lslh_ret_addr11:
740; X64-NEXT:    movq %rsp, %rax
741; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
742; X64-NEXT:    sarq $63, %rax
743; X64-NEXT:    cmpq $.Lslh_ret_addr11, %rcx
744; X64-NEXT:    cmovneq %r13, %rax
745; X64-NEXT:    orq %rax, %r15
746; X64-NEXT:    xorps %xmm0, %xmm0
747; X64-NEXT:    cvtsi2sdq (%r15), %xmm0
748; X64-NEXT:    shlq $47, %rax
749; X64-NEXT:    orq %rax, %rsp
750; X64-NEXT:    callq sink_double
751; X64-NEXT:  .Lslh_ret_addr12:
752; X64-NEXT:    movq %rsp, %rax
753; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
754; X64-NEXT:    sarq $63, %rax
755; X64-NEXT:    cmpq $.Lslh_ret_addr12, %rcx
756; X64-NEXT:    cmovneq %r13, %rax
757; X64-NEXT:    xorps %xmm0, %xmm0
758; X64-NEXT:    cvtsi2ssq (%r15), %xmm0
759; X64-NEXT:    shlq $47, %rax
760; X64-NEXT:    orq %rax, %rsp
761; X64-NEXT:    callq sink_float
762; X64-NEXT:  .Lslh_ret_addr13:
763; X64-NEXT:    movq %rsp, %rax
764; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
765; X64-NEXT:    sarq $63, %rax
766; X64-NEXT:    cmpq $.Lslh_ret_addr13, %rcx
767; X64-NEXT:    cmovneq %r13, %rax
768; X64-NEXT:    xorps %xmm0, %xmm0
769; X64-NEXT:    cvtsi2sdl (%r14), %xmm0
770; X64-NEXT:    shlq $47, %rax
771; X64-NEXT:    orq %rax, %rsp
772; X64-NEXT:    callq sink_double
773; X64-NEXT:  .Lslh_ret_addr14:
774; X64-NEXT:    movq %rsp, %rax
775; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
776; X64-NEXT:    sarq $63, %rax
777; X64-NEXT:    cmpq $.Lslh_ret_addr14, %rcx
778; X64-NEXT:    cmovneq %r13, %rax
779; X64-NEXT:    shlq $47, %rax
780; X64-NEXT:    orq %rax, %rsp
781; X64-NEXT:    popq %rbx
782; X64-NEXT:    popq %r12
783; X64-NEXT:    popq %r13
784; X64-NEXT:    popq %r14
785; X64-NEXT:    popq %r15
786; X64-NEXT:    retq
787;
788; X64-LFENCE-LABEL: test_fp_loads:
789; X64-LFENCE:       # %bb.0: # %entry
790; X64-LFENCE-NEXT:    pushq %r15
791; X64-LFENCE-NEXT:    pushq %r14
792; X64-LFENCE-NEXT:    pushq %r12
793; X64-LFENCE-NEXT:    pushq %rbx
794; X64-LFENCE-NEXT:    pushq %rax
795; X64-LFENCE-NEXT:    movq %rcx, %r15
796; X64-LFENCE-NEXT:    movq %rdx, %r14
797; X64-LFENCE-NEXT:    movq %rsi, %rbx
798; X64-LFENCE-NEXT:    movq %rdi, %r12
799; X64-LFENCE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
800; X64-LFENCE-NEXT:    callq sink_float
801; X64-LFENCE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
802; X64-LFENCE-NEXT:    callq sink_double
803; X64-LFENCE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
804; X64-LFENCE-NEXT:    cvtsd2ss %xmm0, %xmm0
805; X64-LFENCE-NEXT:    callq sink_float
806; X64-LFENCE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
807; X64-LFENCE-NEXT:    cvtss2sd %xmm0, %xmm0
808; X64-LFENCE-NEXT:    callq sink_double
809; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
810; X64-LFENCE-NEXT:    cvtsi2ssl (%r14), %xmm0
811; X64-LFENCE-NEXT:    callq sink_float
812; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
813; X64-LFENCE-NEXT:    cvtsi2sdq (%r15), %xmm0
814; X64-LFENCE-NEXT:    callq sink_double
815; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
816; X64-LFENCE-NEXT:    cvtsi2ssq (%r15), %xmm0
817; X64-LFENCE-NEXT:    callq sink_float
818; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
819; X64-LFENCE-NEXT:    cvtsi2sdl (%r14), %xmm0
820; X64-LFENCE-NEXT:    callq sink_double
821; X64-LFENCE-NEXT:    addq $8, %rsp
822; X64-LFENCE-NEXT:    popq %rbx
823; X64-LFENCE-NEXT:    popq %r12
824; X64-LFENCE-NEXT:    popq %r14
825; X64-LFENCE-NEXT:    popq %r15
826; X64-LFENCE-NEXT:    retq
827entry:
828  %f1 = load float, float* %fptr
829  call void @sink_float(float %f1)
830  %d1 = load double, double* %dptr
831  call void @sink_double(double %d1)
832  %f2.d = load double, double* %dptr
833  %f2 = fptrunc double %f2.d to float
834  call void @sink_float(float %f2)
835  %d2.f = load float, float* %fptr
836  %d2 = fpext float %d2.f to double
837  call void @sink_double(double %d2)
838  %f3.i = load i32, i32* %i32ptr
839  %f3 = sitofp i32 %f3.i to float
840  call void @sink_float(float %f3)
841  %d3.i = load i64, i64* %i64ptr
842  %d3 = sitofp i64 %d3.i to double
843  call void @sink_double(double %d3)
844  %f4.i = load i64, i64* %i64ptr
845  %f4 = sitofp i64 %f4.i to float
846  call void @sink_float(float %f4)
847  %d4.i = load i32, i32* %i32ptr
848  %d4 = sitofp i32 %d4.i to double
849  call void @sink_double(double %d4)
850  ret void
851}
852
853declare void @sink_v4f32(<4 x float>)
854declare void @sink_v2f64(<2 x double>)
855declare void @sink_v16i8(<16 x i8>)
856declare void @sink_v8i16(<8 x i16>)
857declare void @sink_v4i32(<4 x i32>)
858declare void @sink_v2i64(<2 x i64>)
859
860; Test loads of vectors.
861define void @test_vec_loads(<4 x float>* %v4f32ptr, <2 x double>* %v2f64ptr, <16 x i8>* %v16i8ptr, <8 x i16>* %v8i16ptr, <4 x i32>* %v4i32ptr, <2 x i64>* %v2i64ptr) nounwind speculative_load_hardening {
862; X64-LABEL: test_vec_loads:
863; X64:       # %bb.0: # %entry
864; X64-NEXT:    pushq %rbp
865; X64-NEXT:    pushq %r15
866; X64-NEXT:    pushq %r14
867; X64-NEXT:    pushq %r13
868; X64-NEXT:    pushq %r12
869; X64-NEXT:    pushq %rbx
870; X64-NEXT:    pushq %rax
871; X64-NEXT:    movq %rsp, %rax
872; X64-NEXT:    movq %r9, %r14
873; X64-NEXT:    movq %r8, %r15
874; X64-NEXT:    movq %rcx, %r12
875; X64-NEXT:    movq %rdx, %r13
876; X64-NEXT:    movq %rsi, %rbx
877; X64-NEXT:    movq $-1, %rbp
878; X64-NEXT:    sarq $63, %rax
879; X64-NEXT:    orq %rax, %rdi
880; X64-NEXT:    movaps (%rdi), %xmm0
881; X64-NEXT:    shlq $47, %rax
882; X64-NEXT:    orq %rax, %rsp
883; X64-NEXT:    callq sink_v4f32
884; X64-NEXT:  .Lslh_ret_addr15:
885; X64-NEXT:    movq %rsp, %rax
886; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
887; X64-NEXT:    sarq $63, %rax
888; X64-NEXT:    cmpq $.Lslh_ret_addr15, %rcx
889; X64-NEXT:    cmovneq %rbp, %rax
890; X64-NEXT:    orq %rax, %rbx
891; X64-NEXT:    movaps (%rbx), %xmm0
892; X64-NEXT:    shlq $47, %rax
893; X64-NEXT:    orq %rax, %rsp
894; X64-NEXT:    callq sink_v2f64
895; X64-NEXT:  .Lslh_ret_addr16:
896; X64-NEXT:    movq %rsp, %rax
897; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
898; X64-NEXT:    sarq $63, %rax
899; X64-NEXT:    cmpq $.Lslh_ret_addr16, %rcx
900; X64-NEXT:    cmovneq %rbp, %rax
901; X64-NEXT:    orq %rax, %r13
902; X64-NEXT:    movaps (%r13), %xmm0
903; X64-NEXT:    shlq $47, %rax
904; X64-NEXT:    orq %rax, %rsp
905; X64-NEXT:    callq sink_v16i8
906; X64-NEXT:  .Lslh_ret_addr17:
907; X64-NEXT:    movq %rsp, %rax
908; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
909; X64-NEXT:    sarq $63, %rax
910; X64-NEXT:    cmpq $.Lslh_ret_addr17, %rcx
911; X64-NEXT:    cmovneq %rbp, %rax
912; X64-NEXT:    orq %rax, %r12
913; X64-NEXT:    movaps (%r12), %xmm0
914; X64-NEXT:    shlq $47, %rax
915; X64-NEXT:    orq %rax, %rsp
916; X64-NEXT:    callq sink_v8i16
917; X64-NEXT:  .Lslh_ret_addr18:
918; X64-NEXT:    movq %rsp, %rax
919; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
920; X64-NEXT:    sarq $63, %rax
921; X64-NEXT:    cmpq $.Lslh_ret_addr18, %rcx
922; X64-NEXT:    cmovneq %rbp, %rax
923; X64-NEXT:    orq %rax, %r15
924; X64-NEXT:    movaps (%r15), %xmm0
925; X64-NEXT:    shlq $47, %rax
926; X64-NEXT:    orq %rax, %rsp
927; X64-NEXT:    callq sink_v4i32
928; X64-NEXT:  .Lslh_ret_addr19:
929; X64-NEXT:    movq %rsp, %rax
930; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
931; X64-NEXT:    sarq $63, %rax
932; X64-NEXT:    cmpq $.Lslh_ret_addr19, %rcx
933; X64-NEXT:    cmovneq %rbp, %rax
934; X64-NEXT:    orq %rax, %r14
935; X64-NEXT:    movaps (%r14), %xmm0
936; X64-NEXT:    shlq $47, %rax
937; X64-NEXT:    orq %rax, %rsp
938; X64-NEXT:    callq sink_v2i64
939; X64-NEXT:  .Lslh_ret_addr20:
940; X64-NEXT:    movq %rsp, %rax
941; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
942; X64-NEXT:    sarq $63, %rax
943; X64-NEXT:    cmpq $.Lslh_ret_addr20, %rcx
944; X64-NEXT:    cmovneq %rbp, %rax
945; X64-NEXT:    shlq $47, %rax
946; X64-NEXT:    orq %rax, %rsp
947; X64-NEXT:    addq $8, %rsp
948; X64-NEXT:    popq %rbx
949; X64-NEXT:    popq %r12
950; X64-NEXT:    popq %r13
951; X64-NEXT:    popq %r14
952; X64-NEXT:    popq %r15
953; X64-NEXT:    popq %rbp
954; X64-NEXT:    retq
955;
956; X64-LFENCE-LABEL: test_vec_loads:
957; X64-LFENCE:       # %bb.0: # %entry
958; X64-LFENCE-NEXT:    pushq %r15
959; X64-LFENCE-NEXT:    pushq %r14
960; X64-LFENCE-NEXT:    pushq %r13
961; X64-LFENCE-NEXT:    pushq %r12
962; X64-LFENCE-NEXT:    pushq %rbx
963; X64-LFENCE-NEXT:    movq %r9, %r14
964; X64-LFENCE-NEXT:    movq %r8, %r15
965; X64-LFENCE-NEXT:    movq %rcx, %r12
966; X64-LFENCE-NEXT:    movq %rdx, %r13
967; X64-LFENCE-NEXT:    movq %rsi, %rbx
968; X64-LFENCE-NEXT:    movaps (%rdi), %xmm0
969; X64-LFENCE-NEXT:    callq sink_v4f32
970; X64-LFENCE-NEXT:    movaps (%rbx), %xmm0
971; X64-LFENCE-NEXT:    callq sink_v2f64
972; X64-LFENCE-NEXT:    movaps (%r13), %xmm0
973; X64-LFENCE-NEXT:    callq sink_v16i8
974; X64-LFENCE-NEXT:    movaps (%r12), %xmm0
975; X64-LFENCE-NEXT:    callq sink_v8i16
976; X64-LFENCE-NEXT:    movaps (%r15), %xmm0
977; X64-LFENCE-NEXT:    callq sink_v4i32
978; X64-LFENCE-NEXT:    movaps (%r14), %xmm0
979; X64-LFENCE-NEXT:    callq sink_v2i64
980; X64-LFENCE-NEXT:    popq %rbx
981; X64-LFENCE-NEXT:    popq %r12
982; X64-LFENCE-NEXT:    popq %r13
983; X64-LFENCE-NEXT:    popq %r14
984; X64-LFENCE-NEXT:    popq %r15
985; X64-LFENCE-NEXT:    retq
986entry:
987  %x1 = load <4 x float>, <4 x float>* %v4f32ptr
988  call void @sink_v4f32(<4 x float> %x1)
989  %x2 = load <2 x double>, <2 x double>* %v2f64ptr
990  call void @sink_v2f64(<2 x double> %x2)
991  %x3 = load <16 x i8>, <16 x i8>* %v16i8ptr
992  call void @sink_v16i8(<16 x i8> %x3)
993  %x4 = load <8 x i16>, <8 x i16>* %v8i16ptr
994  call void @sink_v8i16(<8 x i16> %x4)
995  %x5 = load <4 x i32>, <4 x i32>* %v4i32ptr
996  call void @sink_v4i32(<4 x i32> %x5)
997  %x6 = load <2 x i64>, <2 x i64>* %v2i64ptr
998  call void @sink_v2i64(<2 x i64> %x6)
999  ret void
1000}
1001
1002define void @test_deferred_hardening(i32* %ptr1, i32* %ptr2, i32 %x) nounwind speculative_load_hardening {
1003; X64-LABEL: test_deferred_hardening:
1004; X64:       # %bb.0: # %entry
1005; X64-NEXT:    pushq %r15
1006; X64-NEXT:    pushq %r14
1007; X64-NEXT:    pushq %rbx
1008; X64-NEXT:    movq %rsp, %rax
1009; X64-NEXT:    movq %rsi, %r14
1010; X64-NEXT:    movq %rdi, %rbx
1011; X64-NEXT:    movq $-1, %r15
1012; X64-NEXT:    sarq $63, %rax
1013; X64-NEXT:    movl (%rdi), %edi
1014; X64-NEXT:    incl %edi
1015; X64-NEXT:    imull %edx, %edi
1016; X64-NEXT:    orl %eax, %edi
1017; X64-NEXT:    shlq $47, %rax
1018; X64-NEXT:    orq %rax, %rsp
1019; X64-NEXT:    callq sink
1020; X64-NEXT:  .Lslh_ret_addr21:
1021; X64-NEXT:    movq %rsp, %rax
1022; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
1023; X64-NEXT:    sarq $63, %rax
1024; X64-NEXT:    cmpq $.Lslh_ret_addr21, %rcx
1025; X64-NEXT:    cmovneq %r15, %rax
1026; X64-NEXT:    movl (%rbx), %ecx
1027; X64-NEXT:    movl (%r14), %edx
1028; X64-NEXT:    leal 1(%rcx,%rdx), %edi
1029; X64-NEXT:    orl %eax, %edi
1030; X64-NEXT:    shlq $47, %rax
1031; X64-NEXT:    orq %rax, %rsp
1032; X64-NEXT:    callq sink
1033; X64-NEXT:  .Lslh_ret_addr22:
1034; X64-NEXT:    movq %rsp, %rax
1035; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
1036; X64-NEXT:    sarq $63, %rax
1037; X64-NEXT:    cmpq $.Lslh_ret_addr22, %rcx
1038; X64-NEXT:    cmovneq %r15, %rax
1039; X64-NEXT:    movl (%rbx), %edi
1040; X64-NEXT:    shll $7, %edi
1041; X64-NEXT:    orl %eax, %edi
1042; X64-NEXT:    shlq $47, %rax
1043; X64-NEXT:    orq %rax, %rsp
1044; X64-NEXT:    callq sink
1045; X64-NEXT:  .Lslh_ret_addr23:
1046; X64-NEXT:    movq %rsp, %rax
1047; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
1048; X64-NEXT:    sarq $63, %rax
1049; X64-NEXT:    cmpq $.Lslh_ret_addr23, %rcx
1050; X64-NEXT:    cmovneq %r15, %rax
1051; X64-NEXT:    movswl (%rbx), %edi
1052; X64-NEXT:    shrl $7, %edi
1053; X64-NEXT:    notl %edi
1054; X64-NEXT:    orl $-65536, %edi # imm = 0xFFFF0000
1055; X64-NEXT:    orl %eax, %edi
1056; X64-NEXT:    shlq $47, %rax
1057; X64-NEXT:    orq %rax, %rsp
1058; X64-NEXT:    callq sink
1059; X64-NEXT:  .Lslh_ret_addr24:
1060; X64-NEXT:    movq %rsp, %rax
1061; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
1062; X64-NEXT:    sarq $63, %rax
1063; X64-NEXT:    cmpq $.Lslh_ret_addr24, %rcx
1064; X64-NEXT:    cmovneq %r15, %rax
1065; X64-NEXT:    movzwl (%rbx), %ecx
1066; X64-NEXT:    rolw $9, %cx
1067; X64-NEXT:    movswl %cx, %edi
1068; X64-NEXT:    negl %edi
1069; X64-NEXT:    orl %eax, %edi
1070; X64-NEXT:    shlq $47, %rax
1071; X64-NEXT:    orq %rax, %rsp
1072; X64-NEXT:    callq sink
1073; X64-NEXT:  .Lslh_ret_addr25:
1074; X64-NEXT:    movq %rsp, %rax
1075; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
1076; X64-NEXT:    sarq $63, %rax
1077; X64-NEXT:    cmpq $.Lslh_ret_addr25, %rcx
1078; X64-NEXT:    cmovneq %r15, %rax
1079; X64-NEXT:    shlq $47, %rax
1080; X64-NEXT:    orq %rax, %rsp
1081; X64-NEXT:    popq %rbx
1082; X64-NEXT:    popq %r14
1083; X64-NEXT:    popq %r15
1084; X64-NEXT:    retq
1085;
1086; X64-LFENCE-LABEL: test_deferred_hardening:
1087; X64-LFENCE:       # %bb.0: # %entry
1088; X64-LFENCE-NEXT:    pushq %r14
1089; X64-LFENCE-NEXT:    pushq %rbx
1090; X64-LFENCE-NEXT:    pushq %rax
1091; X64-LFENCE-NEXT:    movq %rsi, %r14
1092; X64-LFENCE-NEXT:    movq %rdi, %rbx
1093; X64-LFENCE-NEXT:    movl (%rdi), %edi
1094; X64-LFENCE-NEXT:    incl %edi
1095; X64-LFENCE-NEXT:    imull %edx, %edi
1096; X64-LFENCE-NEXT:    callq sink
1097; X64-LFENCE-NEXT:    movl (%rbx), %eax
1098; X64-LFENCE-NEXT:    movl (%r14), %ecx
1099; X64-LFENCE-NEXT:    leal 1(%rax,%rcx), %edi
1100; X64-LFENCE-NEXT:    callq sink
1101; X64-LFENCE-NEXT:    movl (%rbx), %edi
1102; X64-LFENCE-NEXT:    shll $7, %edi
1103; X64-LFENCE-NEXT:    callq sink
1104; X64-LFENCE-NEXT:    movswl (%rbx), %edi
1105; X64-LFENCE-NEXT:    shrl $7, %edi
1106; X64-LFENCE-NEXT:    notl %edi
1107; X64-LFENCE-NEXT:    orl $-65536, %edi # imm = 0xFFFF0000
1108; X64-LFENCE-NEXT:    callq sink
1109; X64-LFENCE-NEXT:    movzwl (%rbx), %eax
1110; X64-LFENCE-NEXT:    rolw $9, %ax
1111; X64-LFENCE-NEXT:    movswl %ax, %edi
1112; X64-LFENCE-NEXT:    negl %edi
1113; X64-LFENCE-NEXT:    callq sink
1114; X64-LFENCE-NEXT:    addq $8, %rsp
1115; X64-LFENCE-NEXT:    popq %rbx
1116; X64-LFENCE-NEXT:    popq %r14
1117; X64-LFENCE-NEXT:    retq
1118entry:
1119  %a1 = load i32, i32* %ptr1
1120  %a2 = add i32 %a1, 1
1121  %a3 = mul i32 %a2, %x
1122  call void @sink(i32 %a3)
1123  %b1 = load i32, i32* %ptr1
1124  %b2 = add i32 %b1, 1
1125  %b3 = load i32, i32* %ptr2
1126  %b4 = add i32 %b2, %b3
1127  call void @sink(i32 %b4)
1128  %c1 = load i32, i32* %ptr1
1129  %c2 = shl i32 %c1, 7
1130  call void @sink(i32 %c2)
1131  %d1 = load i32, i32* %ptr1
1132  ; Check trunc and integer ops narrower than i32.
1133  %d2 = trunc i32 %d1 to i16
1134  %d3 = ashr i16 %d2, 7
1135  %d4 = zext i16 %d3 to i32
1136  %d5 = xor i32 %d4, -1
1137  call void @sink(i32 %d5)
1138  %e1 = load i32, i32* %ptr1
1139  %e2 = trunc i32 %e1 to i16
1140  %e3 = lshr i16 %e2, 7
1141  %e4 = shl i16 %e2, 9
1142  %e5 = or i16 %e3, %e4
1143  %e6 = sext i16 %e5 to i32
1144  %e7 = sub i32 0, %e6
1145  call void @sink(i32 %e7)
1146  ret void
1147}
1148
1149; Make sure we don't crash on idempotent atomic operations which have a
1150; hardcoded reference to RSP+offset.
1151define void @idempotent_atomic(i32* %x) speculative_load_hardening {
1152; X64-LABEL: idempotent_atomic:
1153; X64:       # %bb.0:
1154; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
1155; X64-NEXT:    retq
1156;
1157; X64-LFENCE-LABEL: idempotent_atomic:
1158; X64-LFENCE:       # %bb.0:
1159; X64-LFENCE-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
1160; X64-LFENCE-NEXT:    retq
1161  %tmp = atomicrmw or i32* %x, i32 0 seq_cst
1162  ret void
1163}
1164