1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=X64 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-slh-lfence | FileCheck %s --check-prefix=X64-LFENCE 4; 5; FIXME: Add support for 32-bit and other EH ABIs. 6 7declare void @leak(i32 %v1, i32 %v2) 8 9declare void @sink(i32) 10 11define i32 @test_trivial_entry_load(i32* %ptr) speculative_load_hardening { 12; X64-LABEL: test_trivial_entry_load: 13; X64: # %bb.0: # %entry 14; X64-NEXT: movq %rsp, %rcx 15; X64-NEXT: movq $-1, %rax 16; X64-NEXT: sarq $63, %rcx 17; X64-NEXT: movl (%rdi), %eax 18; X64-NEXT: orl %ecx, %eax 19; X64-NEXT: shlq $47, %rcx 20; X64-NEXT: orq %rcx, %rsp 21; X64-NEXT: retq 22; 23; X64-LFENCE-LABEL: test_trivial_entry_load: 24; X64-LFENCE: # %bb.0: # %entry 25; X64-LFENCE-NEXT: movl (%rdi), %eax 26; X64-LFENCE-NEXT: retq 27entry: 28 %v = load i32, i32* %ptr 29 ret i32 %v 30} 31 32define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr2, i32** %ptr3) speculative_load_hardening { 33; X64-LABEL: test_basic_conditions: 34; X64: # %bb.0: # %entry 35; X64-NEXT: pushq %r15 36; X64-NEXT: .cfi_def_cfa_offset 16 37; X64-NEXT: pushq %r14 38; X64-NEXT: .cfi_def_cfa_offset 24 39; X64-NEXT: pushq %rbx 40; X64-NEXT: .cfi_def_cfa_offset 32 41; X64-NEXT: .cfi_offset %rbx, -32 42; X64-NEXT: .cfi_offset %r14, -24 43; X64-NEXT: .cfi_offset %r15, -16 44; X64-NEXT: movq %rsp, %rax 45; X64-NEXT: movq $-1, %rbx 46; X64-NEXT: sarq $63, %rax 47; X64-NEXT: testl %edi, %edi 48; X64-NEXT: jne .LBB1_1 49; X64-NEXT: # %bb.2: # %then1 50; X64-NEXT: cmovneq %rbx, %rax 51; X64-NEXT: testl %esi, %esi 52; X64-NEXT: je .LBB1_4 53; X64-NEXT: .LBB1_1: 54; X64-NEXT: cmoveq %rbx, %rax 55; X64-NEXT: .LBB1_8: # %exit 56; X64-NEXT: shlq $47, %rax 57; X64-NEXT: orq %rax, %rsp 58; X64-NEXT: popq %rbx 59; X64-NEXT: .cfi_def_cfa_offset 24 60; X64-NEXT: popq %r14 61; X64-NEXT: .cfi_def_cfa_offset 16 62; X64-NEXT: popq %r15 63; X64-NEXT: .cfi_def_cfa_offset 8 64; X64-NEXT: retq 65; X64-NEXT: .LBB1_4: # %then2 66; X64-NEXT: .cfi_def_cfa_offset 32 67; X64-NEXT: movq %r8, %r14 68; X64-NEXT: cmovneq %rbx, %rax 69; X64-NEXT: testl %edx, %edx 70; X64-NEXT: je .LBB1_6 71; X64-NEXT: # %bb.5: # %else3 72; X64-NEXT: cmoveq %rbx, %rax 73; X64-NEXT: movslq (%r9), %rcx 74; X64-NEXT: orq %rax, %rcx 75; X64-NEXT: leaq (%r14,%rcx,4), %r15 76; X64-NEXT: movl %ecx, (%r14,%rcx,4) 77; X64-NEXT: jmp .LBB1_7 78; X64-NEXT: .LBB1_6: # %then3 79; X64-NEXT: cmovneq %rbx, %rax 80; X64-NEXT: movl (%rcx), %ecx 81; X64-NEXT: addl (%r14), %ecx 82; X64-NEXT: movslq %ecx, %rdi 83; X64-NEXT: orq %rax, %rdi 84; X64-NEXT: movl (%r14,%rdi,4), %esi 85; X64-NEXT: orl %eax, %esi 86; X64-NEXT: movq (%r9), %r15 87; X64-NEXT: orq %rax, %r15 88; X64-NEXT: addl (%r15), %esi 89; X64-NEXT: shlq $47, %rax 90; X64-NEXT: # kill: def $edi killed $edi killed $rdi 91; X64-NEXT: orq %rax, %rsp 92; X64-NEXT: callq leak 93; X64-NEXT: .Lslh_ret_addr0: 94; X64-NEXT: movq %rsp, %rax 95; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 96; X64-NEXT: sarq $63, %rax 97; X64-NEXT: cmpq $.Lslh_ret_addr0, %rcx 98; X64-NEXT: cmovneq %rbx, %rax 99; X64-NEXT: .LBB1_7: # %merge 100; X64-NEXT: movslq (%r15), %rcx 101; X64-NEXT: orq %rax, %rcx 102; X64-NEXT: movl $0, (%r14,%rcx,4) 103; X64-NEXT: jmp .LBB1_8 104; 105; X64-LFENCE-LABEL: test_basic_conditions: 106; X64-LFENCE: # %bb.0: # %entry 107; X64-LFENCE-NEXT: pushq %r14 108; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16 109; X64-LFENCE-NEXT: pushq %rbx 110; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24 111; X64-LFENCE-NEXT: pushq %rax 112; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32 113; X64-LFENCE-NEXT: .cfi_offset %rbx, -24 114; X64-LFENCE-NEXT: .cfi_offset %r14, -16 115; X64-LFENCE-NEXT: testl %edi, %edi 116; X64-LFENCE-NEXT: jne .LBB1_6 117; X64-LFENCE-NEXT: # %bb.1: # %then1 118; X64-LFENCE-NEXT: lfence 119; X64-LFENCE-NEXT: testl %esi, %esi 120; X64-LFENCE-NEXT: jne .LBB1_6 121; X64-LFENCE-NEXT: # %bb.2: # %then2 122; X64-LFENCE-NEXT: movq %r8, %rbx 123; X64-LFENCE-NEXT: lfence 124; X64-LFENCE-NEXT: testl %edx, %edx 125; X64-LFENCE-NEXT: je .LBB1_3 126; X64-LFENCE-NEXT: # %bb.4: # %else3 127; X64-LFENCE-NEXT: lfence 128; X64-LFENCE-NEXT: movslq (%r9), %rax 129; X64-LFENCE-NEXT: leaq (%rbx,%rax,4), %r14 130; X64-LFENCE-NEXT: movl %eax, (%rbx,%rax,4) 131; X64-LFENCE-NEXT: jmp .LBB1_5 132; X64-LFENCE-NEXT: .LBB1_3: # %then3 133; X64-LFENCE-NEXT: lfence 134; X64-LFENCE-NEXT: movl (%rcx), %eax 135; X64-LFENCE-NEXT: addl (%rbx), %eax 136; X64-LFENCE-NEXT: movslq %eax, %rdi 137; X64-LFENCE-NEXT: movl (%rbx,%rdi,4), %esi 138; X64-LFENCE-NEXT: movq (%r9), %r14 139; X64-LFENCE-NEXT: addl (%r14), %esi 140; X64-LFENCE-NEXT: # kill: def $edi killed $edi killed $rdi 141; X64-LFENCE-NEXT: callq leak 142; X64-LFENCE-NEXT: .LBB1_5: # %merge 143; X64-LFENCE-NEXT: movslq (%r14), %rax 144; X64-LFENCE-NEXT: movl $0, (%rbx,%rax,4) 145; X64-LFENCE-NEXT: .LBB1_6: # %exit 146; X64-LFENCE-NEXT: lfence 147; X64-LFENCE-NEXT: addq $8, %rsp 148; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24 149; X64-LFENCE-NEXT: popq %rbx 150; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16 151; X64-LFENCE-NEXT: popq %r14 152; X64-LFENCE-NEXT: .cfi_def_cfa_offset 8 153; X64-LFENCE-NEXT: retq 154entry: 155 %a.cmp = icmp eq i32 %a, 0 156 br i1 %a.cmp, label %then1, label %exit 157 158then1: 159 %b.cmp = icmp eq i32 %b, 0 160 br i1 %b.cmp, label %then2, label %exit 161 162then2: 163 %c.cmp = icmp eq i32 %c, 0 164 br i1 %c.cmp, label %then3, label %else3 165 166then3: 167 %secret1 = load i32, i32* %ptr1 168 %secret2 = load i32, i32* %ptr2 169 %secret.sum1 = add i32 %secret1, %secret2 170 %ptr2.idx = getelementptr i32, i32* %ptr2, i32 %secret.sum1 171 %secret3 = load i32, i32* %ptr2.idx 172 %secret4 = load i32*, i32** %ptr3 173 %secret5 = load i32, i32* %secret4 174 %secret.sum2 = add i32 %secret3, %secret5 175 call void @leak(i32 %secret.sum1, i32 %secret.sum2) 176 br label %merge 177 178else3: 179 %secret6 = load i32*, i32** %ptr3 180 %cast = ptrtoint i32* %secret6 to i32 181 %ptr2.idx2 = getelementptr i32, i32* %ptr2, i32 %cast 182 store i32 %cast, i32* %ptr2.idx2 183 br label %merge 184 185merge: 186 %phi = phi i32* [ %secret4, %then3 ], [ %ptr2.idx2, %else3 ] 187 %secret7 = load i32, i32* %phi 188 %ptr2.idx3 = getelementptr i32, i32* %ptr2, i32 %secret7 189 store i32 0, i32* %ptr2.idx3 190 br label %exit 191 192exit: 193 ret void 194} 195 196define void @test_basic_loop(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) nounwind speculative_load_hardening { 197; X64-LABEL: test_basic_loop: 198; X64: # %bb.0: # %entry 199; X64-NEXT: pushq %rbp 200; X64-NEXT: pushq %r15 201; X64-NEXT: pushq %r14 202; X64-NEXT: pushq %r12 203; X64-NEXT: pushq %rbx 204; X64-NEXT: movq %rsp, %rax 205; X64-NEXT: movq $-1, %r15 206; X64-NEXT: sarq $63, %rax 207; X64-NEXT: testl %edi, %edi 208; X64-NEXT: je .LBB2_2 209; X64-NEXT: # %bb.1: 210; X64-NEXT: cmoveq %r15, %rax 211; X64-NEXT: jmp .LBB2_5 212; X64-NEXT: .LBB2_2: # %l.header.preheader 213; X64-NEXT: movq %rcx, %r14 214; X64-NEXT: movq %rdx, %r12 215; X64-NEXT: movl %esi, %ebp 216; X64-NEXT: cmovneq %r15, %rax 217; X64-NEXT: xorl %ebx, %ebx 218; X64-NEXT: .p2align 4, 0x90 219; X64-NEXT: .LBB2_3: # %l.header 220; X64-NEXT: # =>This Inner Loop Header: Depth=1 221; X64-NEXT: movslq (%r12), %rcx 222; X64-NEXT: orq %rax, %rcx 223; X64-NEXT: movq %rax, %rdx 224; X64-NEXT: orq %r14, %rdx 225; X64-NEXT: movl (%rdx,%rcx,4), %edi 226; X64-NEXT: shlq $47, %rax 227; X64-NEXT: orq %rax, %rsp 228; X64-NEXT: callq sink 229; X64-NEXT: .Lslh_ret_addr1: 230; X64-NEXT: movq %rsp, %rax 231; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 232; X64-NEXT: sarq $63, %rax 233; X64-NEXT: cmpq $.Lslh_ret_addr1, %rcx 234; X64-NEXT: cmovneq %r15, %rax 235; X64-NEXT: incl %ebx 236; X64-NEXT: cmpl %ebp, %ebx 237; X64-NEXT: jge .LBB2_4 238; X64-NEXT: # %bb.6: # in Loop: Header=BB2_3 Depth=1 239; X64-NEXT: cmovgeq %r15, %rax 240; X64-NEXT: jmp .LBB2_3 241; X64-NEXT: .LBB2_4: 242; X64-NEXT: cmovlq %r15, %rax 243; X64-NEXT: .LBB2_5: # %exit 244; X64-NEXT: shlq $47, %rax 245; X64-NEXT: orq %rax, %rsp 246; X64-NEXT: popq %rbx 247; X64-NEXT: popq %r12 248; X64-NEXT: popq %r14 249; X64-NEXT: popq %r15 250; X64-NEXT: popq %rbp 251; X64-NEXT: retq 252; 253; X64-LFENCE-LABEL: test_basic_loop: 254; X64-LFENCE: # %bb.0: # %entry 255; X64-LFENCE-NEXT: pushq %rbp 256; X64-LFENCE-NEXT: pushq %r15 257; X64-LFENCE-NEXT: pushq %r14 258; X64-LFENCE-NEXT: pushq %rbx 259; X64-LFENCE-NEXT: pushq %rax 260; X64-LFENCE-NEXT: testl %edi, %edi 261; X64-LFENCE-NEXT: jne .LBB2_3 262; X64-LFENCE-NEXT: # %bb.1: # %l.header.preheader 263; X64-LFENCE-NEXT: movq %rcx, %r14 264; X64-LFENCE-NEXT: movq %rdx, %r15 265; X64-LFENCE-NEXT: movl %esi, %ebp 266; X64-LFENCE-NEXT: lfence 267; X64-LFENCE-NEXT: xorl %ebx, %ebx 268; X64-LFENCE-NEXT: .p2align 4, 0x90 269; X64-LFENCE-NEXT: .LBB2_2: # %l.header 270; X64-LFENCE-NEXT: # =>This Inner Loop Header: Depth=1 271; X64-LFENCE-NEXT: lfence 272; X64-LFENCE-NEXT: movslq (%r15), %rax 273; X64-LFENCE-NEXT: movl (%r14,%rax,4), %edi 274; X64-LFENCE-NEXT: callq sink 275; X64-LFENCE-NEXT: incl %ebx 276; X64-LFENCE-NEXT: cmpl %ebp, %ebx 277; X64-LFENCE-NEXT: jl .LBB2_2 278; X64-LFENCE-NEXT: .LBB2_3: # %exit 279; X64-LFENCE-NEXT: lfence 280; X64-LFENCE-NEXT: addq $8, %rsp 281; X64-LFENCE-NEXT: popq %rbx 282; X64-LFENCE-NEXT: popq %r14 283; X64-LFENCE-NEXT: popq %r15 284; X64-LFENCE-NEXT: popq %rbp 285; X64-LFENCE-NEXT: retq 286entry: 287 %a.cmp = icmp eq i32 %a, 0 288 br i1 %a.cmp, label %l.header, label %exit 289 290l.header: 291 %i = phi i32 [ 0, %entry ], [ %i.next, %l.header ] 292 %secret = load i32, i32* %ptr1 293 %ptr2.idx = getelementptr i32, i32* %ptr2, i32 %secret 294 %leak = load i32, i32* %ptr2.idx 295 call void @sink(i32 %leak) 296 %i.next = add i32 %i, 1 297 %i.cmp = icmp slt i32 %i.next, %b 298 br i1 %i.cmp, label %l.header, label %exit 299 300exit: 301 ret void 302} 303 304define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr2) nounwind speculative_load_hardening { 305; X64-LABEL: test_basic_nested_loop: 306; X64: # %bb.0: # %entry 307; X64-NEXT: pushq %rbp 308; X64-NEXT: pushq %r15 309; X64-NEXT: pushq %r14 310; X64-NEXT: pushq %r13 311; X64-NEXT: pushq %r12 312; X64-NEXT: pushq %rbx 313; X64-NEXT: pushq %rax 314; X64-NEXT: movq %rsp, %rax 315; X64-NEXT: movq $-1, %rbp 316; X64-NEXT: sarq $63, %rax 317; X64-NEXT: testl %edi, %edi 318; X64-NEXT: je .LBB3_2 319; X64-NEXT: # %bb.1: 320; X64-NEXT: cmoveq %rbp, %rax 321; X64-NEXT: jmp .LBB3_10 322; X64-NEXT: .LBB3_2: # %l1.header.preheader 323; X64-NEXT: movq %r8, %r14 324; X64-NEXT: movq %rcx, %rbx 325; X64-NEXT: movl %edx, %r12d 326; X64-NEXT: movl %esi, %r15d 327; X64-NEXT: cmovneq %rbp, %rax 328; X64-NEXT: xorl %r13d, %r13d 329; X64-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 330; X64-NEXT: testl %r15d, %r15d 331; X64-NEXT: jle .LBB3_4 332; X64-NEXT: .p2align 4, 0x90 333; X64-NEXT: .LBB3_5: # %l2.header.preheader 334; X64-NEXT: cmovleq %rbp, %rax 335; X64-NEXT: xorl %r15d, %r15d 336; X64-NEXT: .p2align 4, 0x90 337; X64-NEXT: .LBB3_6: # %l2.header 338; X64-NEXT: # =>This Inner Loop Header: Depth=1 339; X64-NEXT: movslq (%rbx), %rcx 340; X64-NEXT: orq %rax, %rcx 341; X64-NEXT: movq %rax, %rdx 342; X64-NEXT: orq %r14, %rdx 343; X64-NEXT: movl (%rdx,%rcx,4), %edi 344; X64-NEXT: shlq $47, %rax 345; X64-NEXT: orq %rax, %rsp 346; X64-NEXT: callq sink 347; X64-NEXT: .Lslh_ret_addr2: 348; X64-NEXT: movq %rsp, %rax 349; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 350; X64-NEXT: sarq $63, %rax 351; X64-NEXT: cmpq $.Lslh_ret_addr2, %rcx 352; X64-NEXT: cmovneq %rbp, %rax 353; X64-NEXT: incl %r15d 354; X64-NEXT: cmpl %r12d, %r15d 355; X64-NEXT: jge .LBB3_7 356; X64-NEXT: # %bb.11: # in Loop: Header=BB3_6 Depth=1 357; X64-NEXT: cmovgeq %rbp, %rax 358; X64-NEXT: jmp .LBB3_6 359; X64-NEXT: .p2align 4, 0x90 360; X64-NEXT: .LBB3_7: 361; X64-NEXT: cmovlq %rbp, %rax 362; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload 363; X64-NEXT: jmp .LBB3_8 364; X64-NEXT: .p2align 4, 0x90 365; X64-NEXT: .LBB3_4: 366; X64-NEXT: cmovgq %rbp, %rax 367; X64-NEXT: .LBB3_8: # %l1.latch 368; X64-NEXT: movslq (%rbx), %rcx 369; X64-NEXT: orq %rax, %rcx 370; X64-NEXT: movq %rax, %rdx 371; X64-NEXT: orq %r14, %rdx 372; X64-NEXT: movl (%rdx,%rcx,4), %edi 373; X64-NEXT: shlq $47, %rax 374; X64-NEXT: orq %rax, %rsp 375; X64-NEXT: callq sink 376; X64-NEXT: .Lslh_ret_addr3: 377; X64-NEXT: movq %rsp, %rax 378; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 379; X64-NEXT: sarq $63, %rax 380; X64-NEXT: cmpq $.Lslh_ret_addr3, %rcx 381; X64-NEXT: cmovneq %rbp, %rax 382; X64-NEXT: incl %r13d 383; X64-NEXT: cmpl %r15d, %r13d 384; X64-NEXT: jge .LBB3_9 385; X64-NEXT: # %bb.12: 386; X64-NEXT: cmovgeq %rbp, %rax 387; X64-NEXT: testl %r15d, %r15d 388; X64-NEXT: jg .LBB3_5 389; X64-NEXT: jmp .LBB3_4 390; X64-NEXT: .LBB3_9: 391; X64-NEXT: cmovlq %rbp, %rax 392; X64-NEXT: .LBB3_10: # %exit 393; X64-NEXT: shlq $47, %rax 394; X64-NEXT: orq %rax, %rsp 395; X64-NEXT: addq $8, %rsp 396; X64-NEXT: popq %rbx 397; X64-NEXT: popq %r12 398; X64-NEXT: popq %r13 399; X64-NEXT: popq %r14 400; X64-NEXT: popq %r15 401; X64-NEXT: popq %rbp 402; X64-NEXT: retq 403; 404; X64-LFENCE-LABEL: test_basic_nested_loop: 405; X64-LFENCE: # %bb.0: # %entry 406; X64-LFENCE-NEXT: pushq %rbp 407; X64-LFENCE-NEXT: pushq %r15 408; X64-LFENCE-NEXT: pushq %r14 409; X64-LFENCE-NEXT: pushq %r13 410; X64-LFENCE-NEXT: pushq %r12 411; X64-LFENCE-NEXT: pushq %rbx 412; X64-LFENCE-NEXT: pushq %rax 413; X64-LFENCE-NEXT: testl %edi, %edi 414; X64-LFENCE-NEXT: je .LBB3_1 415; X64-LFENCE-NEXT: .LBB3_6: # %exit 416; X64-LFENCE-NEXT: lfence 417; X64-LFENCE-NEXT: addq $8, %rsp 418; X64-LFENCE-NEXT: popq %rbx 419; X64-LFENCE-NEXT: popq %r12 420; X64-LFENCE-NEXT: popq %r13 421; X64-LFENCE-NEXT: popq %r14 422; X64-LFENCE-NEXT: popq %r15 423; X64-LFENCE-NEXT: popq %rbp 424; X64-LFENCE-NEXT: retq 425; X64-LFENCE-NEXT: .LBB3_1: # %l1.header.preheader 426; X64-LFENCE-NEXT: movq %r8, %r14 427; X64-LFENCE-NEXT: movq %rcx, %rbx 428; X64-LFENCE-NEXT: movl %edx, %r13d 429; X64-LFENCE-NEXT: movl %esi, %r15d 430; X64-LFENCE-NEXT: lfence 431; X64-LFENCE-NEXT: xorl %r12d, %r12d 432; X64-LFENCE-NEXT: jmp .LBB3_2 433; X64-LFENCE-NEXT: .p2align 4, 0x90 434; X64-LFENCE-NEXT: .LBB3_5: # %l1.latch 435; X64-LFENCE-NEXT: # in Loop: Header=BB3_2 Depth=1 436; X64-LFENCE-NEXT: lfence 437; X64-LFENCE-NEXT: movslq (%rbx), %rax 438; X64-LFENCE-NEXT: movl (%r14,%rax,4), %edi 439; X64-LFENCE-NEXT: callq sink 440; X64-LFENCE-NEXT: incl %r12d 441; X64-LFENCE-NEXT: cmpl %r15d, %r12d 442; X64-LFENCE-NEXT: jge .LBB3_6 443; X64-LFENCE-NEXT: .LBB3_2: # %l1.header 444; X64-LFENCE-NEXT: # =>This Loop Header: Depth=1 445; X64-LFENCE-NEXT: # Child Loop BB3_4 Depth 2 446; X64-LFENCE-NEXT: lfence 447; X64-LFENCE-NEXT: testl %r15d, %r15d 448; X64-LFENCE-NEXT: jle .LBB3_5 449; X64-LFENCE-NEXT: # %bb.3: # %l2.header.preheader 450; X64-LFENCE-NEXT: # in Loop: Header=BB3_2 Depth=1 451; X64-LFENCE-NEXT: lfence 452; X64-LFENCE-NEXT: xorl %ebp, %ebp 453; X64-LFENCE-NEXT: .p2align 4, 0x90 454; X64-LFENCE-NEXT: .LBB3_4: # %l2.header 455; X64-LFENCE-NEXT: # Parent Loop BB3_2 Depth=1 456; X64-LFENCE-NEXT: # => This Inner Loop Header: Depth=2 457; X64-LFENCE-NEXT: lfence 458; X64-LFENCE-NEXT: movslq (%rbx), %rax 459; X64-LFENCE-NEXT: movl (%r14,%rax,4), %edi 460; X64-LFENCE-NEXT: callq sink 461; X64-LFENCE-NEXT: incl %ebp 462; X64-LFENCE-NEXT: cmpl %r13d, %ebp 463; X64-LFENCE-NEXT: jl .LBB3_4 464; X64-LFENCE-NEXT: jmp .LBB3_5 465entry: 466 %a.cmp = icmp eq i32 %a, 0 467 br i1 %a.cmp, label %l1.header, label %exit 468 469l1.header: 470 %i = phi i32 [ 0, %entry ], [ %i.next, %l1.latch ] 471 %b.cmp = icmp sgt i32 %b, 0 472 br i1 %b.cmp, label %l2.header, label %l1.latch 473 474l2.header: 475 %j = phi i32 [ 0, %l1.header ], [ %j.next, %l2.header ] 476 %secret = load i32, i32* %ptr1 477 %ptr2.idx = getelementptr i32, i32* %ptr2, i32 %secret 478 %leak = load i32, i32* %ptr2.idx 479 call void @sink(i32 %leak) 480 %j.next = add i32 %j, 1 481 %j.cmp = icmp slt i32 %j.next, %c 482 br i1 %j.cmp, label %l2.header, label %l1.latch 483 484l1.latch: 485 %secret2 = load i32, i32* %ptr1 486 %ptr2.idx2 = getelementptr i32, i32* %ptr2, i32 %secret2 487 %leak2 = load i32, i32* %ptr2.idx2 488 call void @sink(i32 %leak2) 489 %i.next = add i32 %i, 1 490 %i.cmp = icmp slt i32 %i.next, %b 491 br i1 %i.cmp, label %l1.header, label %exit 492 493exit: 494 ret void 495} 496 497declare i32 @__gxx_personality_v0(...) 498 499declare i8* @__cxa_allocate_exception(i64) local_unnamed_addr 500 501declare void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr 502 503define void @test_basic_eh(i32 %a, i32* %ptr1, i32* %ptr2) speculative_load_hardening personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { 504; X64-LABEL: test_basic_eh: 505; X64: # %bb.0: # %entry 506; X64-NEXT: pushq %rbp 507; X64-NEXT: .cfi_def_cfa_offset 16 508; X64-NEXT: pushq %r15 509; X64-NEXT: .cfi_def_cfa_offset 24 510; X64-NEXT: pushq %r14 511; X64-NEXT: .cfi_def_cfa_offset 32 512; X64-NEXT: pushq %rbx 513; X64-NEXT: .cfi_def_cfa_offset 40 514; X64-NEXT: pushq %rax 515; X64-NEXT: .cfi_def_cfa_offset 48 516; X64-NEXT: .cfi_offset %rbx, -40 517; X64-NEXT: .cfi_offset %r14, -32 518; X64-NEXT: .cfi_offset %r15, -24 519; X64-NEXT: .cfi_offset %rbp, -16 520; X64-NEXT: movq %rsp, %rax 521; X64-NEXT: movq $-1, %r15 522; X64-NEXT: sarq $63, %rax 523; X64-NEXT: cmpl $41, %edi 524; X64-NEXT: jg .LBB4_1 525; X64-NEXT: # %bb.2: # %thrower 526; X64-NEXT: movq %rdx, %r14 527; X64-NEXT: movq %rsi, %rbx 528; X64-NEXT: cmovgq %r15, %rax 529; X64-NEXT: movslq %edi, %rcx 530; X64-NEXT: movl (%rsi,%rcx,4), %ebp 531; X64-NEXT: orl %eax, %ebp 532; X64-NEXT: movl $4, %edi 533; X64-NEXT: shlq $47, %rax 534; X64-NEXT: orq %rax, %rsp 535; X64-NEXT: callq __cxa_allocate_exception 536; X64-NEXT: .Lslh_ret_addr4: 537; X64-NEXT: movq %rsp, %rcx 538; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx 539; X64-NEXT: sarq $63, %rcx 540; X64-NEXT: cmpq $.Lslh_ret_addr4, %rdx 541; X64-NEXT: cmovneq %r15, %rcx 542; X64-NEXT: movl %ebp, (%rax) 543; X64-NEXT: .Ltmp0: 544; X64-NEXT: shlq $47, %rcx 545; X64-NEXT: movq %rax, %rdi 546; X64-NEXT: xorl %esi, %esi 547; X64-NEXT: xorl %edx, %edx 548; X64-NEXT: orq %rcx, %rsp 549; X64-NEXT: callq __cxa_throw 550; X64-NEXT: .Lslh_ret_addr5: 551; X64-NEXT: movq %rsp, %rax 552; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 553; X64-NEXT: sarq $63, %rax 554; X64-NEXT: cmpq $.Lslh_ret_addr5, %rcx 555; X64-NEXT: cmovneq %r15, %rax 556; X64-NEXT: .Ltmp1: 557; X64-NEXT: jmp .LBB4_3 558; X64-NEXT: .LBB4_1: 559; X64-NEXT: cmovleq %r15, %rax 560; X64-NEXT: .LBB4_3: # %exit 561; X64-NEXT: shlq $47, %rax 562; X64-NEXT: orq %rax, %rsp 563; X64-NEXT: addq $8, %rsp 564; X64-NEXT: .cfi_def_cfa_offset 40 565; X64-NEXT: popq %rbx 566; X64-NEXT: .cfi_def_cfa_offset 32 567; X64-NEXT: popq %r14 568; X64-NEXT: .cfi_def_cfa_offset 24 569; X64-NEXT: popq %r15 570; X64-NEXT: .cfi_def_cfa_offset 16 571; X64-NEXT: popq %rbp 572; X64-NEXT: .cfi_def_cfa_offset 8 573; X64-NEXT: retq 574; X64-NEXT: .LBB4_4: # %lpad 575; X64-NEXT: .cfi_def_cfa_offset 48 576; X64-NEXT: .Ltmp2: 577; X64-NEXT: movq %rsp, %rcx 578; X64-NEXT: sarq $63, %rcx 579; X64-NEXT: movl (%rax), %eax 580; X64-NEXT: addl (%rbx), %eax 581; X64-NEXT: cltq 582; X64-NEXT: orq %rcx, %rax 583; X64-NEXT: movl (%r14,%rax,4), %edi 584; X64-NEXT: orl %ecx, %edi 585; X64-NEXT: shlq $47, %rcx 586; X64-NEXT: orq %rcx, %rsp 587; X64-NEXT: callq sink 588; X64-NEXT: .Lslh_ret_addr6: 589; X64-NEXT: movq %rsp, %rax 590; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 591; X64-NEXT: sarq $63, %rax 592; X64-NEXT: cmpq $.Lslh_ret_addr6, %rcx 593; X64-NEXT: cmovneq %r15, %rax 594; 595; X64-LFENCE-LABEL: test_basic_eh: 596; X64-LFENCE: # %bb.0: # %entry 597; X64-LFENCE-NEXT: pushq %rbp 598; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16 599; X64-LFENCE-NEXT: pushq %r14 600; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24 601; X64-LFENCE-NEXT: pushq %rbx 602; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32 603; X64-LFENCE-NEXT: .cfi_offset %rbx, -32 604; X64-LFENCE-NEXT: .cfi_offset %r14, -24 605; X64-LFENCE-NEXT: .cfi_offset %rbp, -16 606; X64-LFENCE-NEXT: cmpl $41, %edi 607; X64-LFENCE-NEXT: jg .LBB4_2 608; X64-LFENCE-NEXT: # %bb.1: # %thrower 609; X64-LFENCE-NEXT: movq %rdx, %r14 610; X64-LFENCE-NEXT: movq %rsi, %rbx 611; X64-LFENCE-NEXT: lfence 612; X64-LFENCE-NEXT: movslq %edi, %rax 613; X64-LFENCE-NEXT: movl (%rsi,%rax,4), %ebp 614; X64-LFENCE-NEXT: movl $4, %edi 615; X64-LFENCE-NEXT: callq __cxa_allocate_exception 616; X64-LFENCE-NEXT: movl %ebp, (%rax) 617; X64-LFENCE-NEXT: .Ltmp0: 618; X64-LFENCE-NEXT: movq %rax, %rdi 619; X64-LFENCE-NEXT: xorl %esi, %esi 620; X64-LFENCE-NEXT: xorl %edx, %edx 621; X64-LFENCE-NEXT: callq __cxa_throw 622; X64-LFENCE-NEXT: .Ltmp1: 623; X64-LFENCE-NEXT: .LBB4_2: # %exit 624; X64-LFENCE-NEXT: lfence 625; X64-LFENCE-NEXT: popq %rbx 626; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24 627; X64-LFENCE-NEXT: popq %r14 628; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16 629; X64-LFENCE-NEXT: popq %rbp 630; X64-LFENCE-NEXT: .cfi_def_cfa_offset 8 631; X64-LFENCE-NEXT: retq 632; X64-LFENCE-NEXT: .LBB4_3: # %lpad 633; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32 634; X64-LFENCE-NEXT: .Ltmp2: 635; X64-LFENCE-NEXT: movl (%rax), %eax 636; X64-LFENCE-NEXT: addl (%rbx), %eax 637; X64-LFENCE-NEXT: cltq 638; X64-LFENCE-NEXT: movl (%r14,%rax,4), %edi 639; X64-LFENCE-NEXT: callq sink 640entry: 641 %a.cmp = icmp slt i32 %a, 42 642 br i1 %a.cmp, label %thrower, label %exit 643 644thrower: 645 %badidx = getelementptr i32, i32* %ptr1, i32 %a 646 %secret1 = load i32, i32* %badidx 647 %e.ptr = call i8* @__cxa_allocate_exception(i64 4) 648 %e.ptr.cast = bitcast i8* %e.ptr to i32* 649 store i32 %secret1, i32* %e.ptr.cast 650 invoke void @__cxa_throw(i8* %e.ptr, i8* null, i8* null) 651 to label %exit unwind label %lpad 652 653exit: 654 ret void 655 656lpad: 657 %e = landingpad { i8*, i32 } 658 catch i8* null 659 %e.catch.ptr = extractvalue { i8*, i32 } %e, 0 660 %e.catch.ptr.cast = bitcast i8* %e.catch.ptr to i32* 661 %secret1.catch = load i32, i32* %e.catch.ptr.cast 662 %secret2 = load i32, i32* %ptr1 663 %secret.sum = add i32 %secret1.catch, %secret2 664 %ptr2.idx = getelementptr i32, i32* %ptr2, i32 %secret.sum 665 %leak = load i32, i32* %ptr2.idx 666 call void @sink(i32 %leak) 667 unreachable 668} 669 670declare void @sink_float(float) 671declare void @sink_double(double) 672 673; Test direct and converting loads of floating point values. 674define void @test_fp_loads(float* %fptr, double* %dptr, i32* %i32ptr, i64* %i64ptr) nounwind speculative_load_hardening { 675; X64-LABEL: test_fp_loads: 676; X64: # %bb.0: # %entry 677; X64-NEXT: pushq %r15 678; X64-NEXT: pushq %r14 679; X64-NEXT: pushq %r13 680; X64-NEXT: pushq %r12 681; X64-NEXT: pushq %rbx 682; X64-NEXT: movq %rsp, %rax 683; X64-NEXT: movq %rcx, %r15 684; X64-NEXT: movq %rdx, %r14 685; X64-NEXT: movq %rsi, %rbx 686; X64-NEXT: movq %rdi, %r12 687; X64-NEXT: movq $-1, %r13 688; X64-NEXT: sarq $63, %rax 689; X64-NEXT: orq %rax, %r12 690; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 691; X64-NEXT: shlq $47, %rax 692; X64-NEXT: orq %rax, %rsp 693; X64-NEXT: callq sink_float 694; X64-NEXT: .Lslh_ret_addr7: 695; X64-NEXT: movq %rsp, %rax 696; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 697; X64-NEXT: sarq $63, %rax 698; X64-NEXT: cmpq $.Lslh_ret_addr7, %rcx 699; X64-NEXT: cmovneq %r13, %rax 700; X64-NEXT: orq %rax, %rbx 701; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 702; X64-NEXT: shlq $47, %rax 703; X64-NEXT: orq %rax, %rsp 704; X64-NEXT: callq sink_double 705; X64-NEXT: .Lslh_ret_addr8: 706; X64-NEXT: movq %rsp, %rax 707; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 708; X64-NEXT: sarq $63, %rax 709; X64-NEXT: cmpq $.Lslh_ret_addr8, %rcx 710; X64-NEXT: cmovneq %r13, %rax 711; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 712; X64-NEXT: cvtsd2ss %xmm0, %xmm0 713; X64-NEXT: shlq $47, %rax 714; X64-NEXT: orq %rax, %rsp 715; X64-NEXT: callq sink_float 716; X64-NEXT: .Lslh_ret_addr9: 717; X64-NEXT: movq %rsp, %rax 718; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 719; X64-NEXT: sarq $63, %rax 720; X64-NEXT: cmpq $.Lslh_ret_addr9, %rcx 721; X64-NEXT: cmovneq %r13, %rax 722; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 723; X64-NEXT: cvtss2sd %xmm0, %xmm0 724; X64-NEXT: shlq $47, %rax 725; X64-NEXT: orq %rax, %rsp 726; X64-NEXT: callq sink_double 727; X64-NEXT: .Lslh_ret_addr10: 728; X64-NEXT: movq %rsp, %rax 729; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 730; X64-NEXT: sarq $63, %rax 731; X64-NEXT: cmpq $.Lslh_ret_addr10, %rcx 732; X64-NEXT: cmovneq %r13, %rax 733; X64-NEXT: orq %rax, %r14 734; X64-NEXT: xorps %xmm0, %xmm0 735; X64-NEXT: cvtsi2ssl (%r14), %xmm0 736; X64-NEXT: shlq $47, %rax 737; X64-NEXT: orq %rax, %rsp 738; X64-NEXT: callq sink_float 739; X64-NEXT: .Lslh_ret_addr11: 740; X64-NEXT: movq %rsp, %rax 741; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 742; X64-NEXT: sarq $63, %rax 743; X64-NEXT: cmpq $.Lslh_ret_addr11, %rcx 744; X64-NEXT: cmovneq %r13, %rax 745; X64-NEXT: orq %rax, %r15 746; X64-NEXT: xorps %xmm0, %xmm0 747; X64-NEXT: cvtsi2sdq (%r15), %xmm0 748; X64-NEXT: shlq $47, %rax 749; X64-NEXT: orq %rax, %rsp 750; X64-NEXT: callq sink_double 751; X64-NEXT: .Lslh_ret_addr12: 752; X64-NEXT: movq %rsp, %rax 753; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 754; X64-NEXT: sarq $63, %rax 755; X64-NEXT: cmpq $.Lslh_ret_addr12, %rcx 756; X64-NEXT: cmovneq %r13, %rax 757; X64-NEXT: xorps %xmm0, %xmm0 758; X64-NEXT: cvtsi2ssq (%r15), %xmm0 759; X64-NEXT: shlq $47, %rax 760; X64-NEXT: orq %rax, %rsp 761; X64-NEXT: callq sink_float 762; X64-NEXT: .Lslh_ret_addr13: 763; X64-NEXT: movq %rsp, %rax 764; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 765; X64-NEXT: sarq $63, %rax 766; X64-NEXT: cmpq $.Lslh_ret_addr13, %rcx 767; X64-NEXT: cmovneq %r13, %rax 768; X64-NEXT: xorps %xmm0, %xmm0 769; X64-NEXT: cvtsi2sdl (%r14), %xmm0 770; X64-NEXT: shlq $47, %rax 771; X64-NEXT: orq %rax, %rsp 772; X64-NEXT: callq sink_double 773; X64-NEXT: .Lslh_ret_addr14: 774; X64-NEXT: movq %rsp, %rax 775; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 776; X64-NEXT: sarq $63, %rax 777; X64-NEXT: cmpq $.Lslh_ret_addr14, %rcx 778; X64-NEXT: cmovneq %r13, %rax 779; X64-NEXT: shlq $47, %rax 780; X64-NEXT: orq %rax, %rsp 781; X64-NEXT: popq %rbx 782; X64-NEXT: popq %r12 783; X64-NEXT: popq %r13 784; X64-NEXT: popq %r14 785; X64-NEXT: popq %r15 786; X64-NEXT: retq 787; 788; X64-LFENCE-LABEL: test_fp_loads: 789; X64-LFENCE: # %bb.0: # %entry 790; X64-LFENCE-NEXT: pushq %r15 791; X64-LFENCE-NEXT: pushq %r14 792; X64-LFENCE-NEXT: pushq %r12 793; X64-LFENCE-NEXT: pushq %rbx 794; X64-LFENCE-NEXT: pushq %rax 795; X64-LFENCE-NEXT: movq %rcx, %r15 796; X64-LFENCE-NEXT: movq %rdx, %r14 797; X64-LFENCE-NEXT: movq %rsi, %rbx 798; X64-LFENCE-NEXT: movq %rdi, %r12 799; X64-LFENCE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 800; X64-LFENCE-NEXT: callq sink_float 801; X64-LFENCE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 802; X64-LFENCE-NEXT: callq sink_double 803; X64-LFENCE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 804; X64-LFENCE-NEXT: cvtsd2ss %xmm0, %xmm0 805; X64-LFENCE-NEXT: callq sink_float 806; X64-LFENCE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 807; X64-LFENCE-NEXT: cvtss2sd %xmm0, %xmm0 808; X64-LFENCE-NEXT: callq sink_double 809; X64-LFENCE-NEXT: xorps %xmm0, %xmm0 810; X64-LFENCE-NEXT: cvtsi2ssl (%r14), %xmm0 811; X64-LFENCE-NEXT: callq sink_float 812; X64-LFENCE-NEXT: xorps %xmm0, %xmm0 813; X64-LFENCE-NEXT: cvtsi2sdq (%r15), %xmm0 814; X64-LFENCE-NEXT: callq sink_double 815; X64-LFENCE-NEXT: xorps %xmm0, %xmm0 816; X64-LFENCE-NEXT: cvtsi2ssq (%r15), %xmm0 817; X64-LFENCE-NEXT: callq sink_float 818; X64-LFENCE-NEXT: xorps %xmm0, %xmm0 819; X64-LFENCE-NEXT: cvtsi2sdl (%r14), %xmm0 820; X64-LFENCE-NEXT: callq sink_double 821; X64-LFENCE-NEXT: addq $8, %rsp 822; X64-LFENCE-NEXT: popq %rbx 823; X64-LFENCE-NEXT: popq %r12 824; X64-LFENCE-NEXT: popq %r14 825; X64-LFENCE-NEXT: popq %r15 826; X64-LFENCE-NEXT: retq 827entry: 828 %f1 = load float, float* %fptr 829 call void @sink_float(float %f1) 830 %d1 = load double, double* %dptr 831 call void @sink_double(double %d1) 832 %f2.d = load double, double* %dptr 833 %f2 = fptrunc double %f2.d to float 834 call void @sink_float(float %f2) 835 %d2.f = load float, float* %fptr 836 %d2 = fpext float %d2.f to double 837 call void @sink_double(double %d2) 838 %f3.i = load i32, i32* %i32ptr 839 %f3 = sitofp i32 %f3.i to float 840 call void @sink_float(float %f3) 841 %d3.i = load i64, i64* %i64ptr 842 %d3 = sitofp i64 %d3.i to double 843 call void @sink_double(double %d3) 844 %f4.i = load i64, i64* %i64ptr 845 %f4 = sitofp i64 %f4.i to float 846 call void @sink_float(float %f4) 847 %d4.i = load i32, i32* %i32ptr 848 %d4 = sitofp i32 %d4.i to double 849 call void @sink_double(double %d4) 850 ret void 851} 852 853declare void @sink_v4f32(<4 x float>) 854declare void @sink_v2f64(<2 x double>) 855declare void @sink_v16i8(<16 x i8>) 856declare void @sink_v8i16(<8 x i16>) 857declare void @sink_v4i32(<4 x i32>) 858declare void @sink_v2i64(<2 x i64>) 859 860; Test loads of vectors. 861define void @test_vec_loads(<4 x float>* %v4f32ptr, <2 x double>* %v2f64ptr, <16 x i8>* %v16i8ptr, <8 x i16>* %v8i16ptr, <4 x i32>* %v4i32ptr, <2 x i64>* %v2i64ptr) nounwind speculative_load_hardening { 862; X64-LABEL: test_vec_loads: 863; X64: # %bb.0: # %entry 864; X64-NEXT: pushq %rbp 865; X64-NEXT: pushq %r15 866; X64-NEXT: pushq %r14 867; X64-NEXT: pushq %r13 868; X64-NEXT: pushq %r12 869; X64-NEXT: pushq %rbx 870; X64-NEXT: pushq %rax 871; X64-NEXT: movq %rsp, %rax 872; X64-NEXT: movq %r9, %r14 873; X64-NEXT: movq %r8, %r15 874; X64-NEXT: movq %rcx, %r12 875; X64-NEXT: movq %rdx, %r13 876; X64-NEXT: movq %rsi, %rbx 877; X64-NEXT: movq $-1, %rbp 878; X64-NEXT: sarq $63, %rax 879; X64-NEXT: orq %rax, %rdi 880; X64-NEXT: movaps (%rdi), %xmm0 881; X64-NEXT: shlq $47, %rax 882; X64-NEXT: orq %rax, %rsp 883; X64-NEXT: callq sink_v4f32 884; X64-NEXT: .Lslh_ret_addr15: 885; X64-NEXT: movq %rsp, %rax 886; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 887; X64-NEXT: sarq $63, %rax 888; X64-NEXT: cmpq $.Lslh_ret_addr15, %rcx 889; X64-NEXT: cmovneq %rbp, %rax 890; X64-NEXT: orq %rax, %rbx 891; X64-NEXT: movaps (%rbx), %xmm0 892; X64-NEXT: shlq $47, %rax 893; X64-NEXT: orq %rax, %rsp 894; X64-NEXT: callq sink_v2f64 895; X64-NEXT: .Lslh_ret_addr16: 896; X64-NEXT: movq %rsp, %rax 897; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 898; X64-NEXT: sarq $63, %rax 899; X64-NEXT: cmpq $.Lslh_ret_addr16, %rcx 900; X64-NEXT: cmovneq %rbp, %rax 901; X64-NEXT: orq %rax, %r13 902; X64-NEXT: movaps (%r13), %xmm0 903; X64-NEXT: shlq $47, %rax 904; X64-NEXT: orq %rax, %rsp 905; X64-NEXT: callq sink_v16i8 906; X64-NEXT: .Lslh_ret_addr17: 907; X64-NEXT: movq %rsp, %rax 908; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 909; X64-NEXT: sarq $63, %rax 910; X64-NEXT: cmpq $.Lslh_ret_addr17, %rcx 911; X64-NEXT: cmovneq %rbp, %rax 912; X64-NEXT: orq %rax, %r12 913; X64-NEXT: movaps (%r12), %xmm0 914; X64-NEXT: shlq $47, %rax 915; X64-NEXT: orq %rax, %rsp 916; X64-NEXT: callq sink_v8i16 917; X64-NEXT: .Lslh_ret_addr18: 918; X64-NEXT: movq %rsp, %rax 919; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 920; X64-NEXT: sarq $63, %rax 921; X64-NEXT: cmpq $.Lslh_ret_addr18, %rcx 922; X64-NEXT: cmovneq %rbp, %rax 923; X64-NEXT: orq %rax, %r15 924; X64-NEXT: movaps (%r15), %xmm0 925; X64-NEXT: shlq $47, %rax 926; X64-NEXT: orq %rax, %rsp 927; X64-NEXT: callq sink_v4i32 928; X64-NEXT: .Lslh_ret_addr19: 929; X64-NEXT: movq %rsp, %rax 930; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 931; X64-NEXT: sarq $63, %rax 932; X64-NEXT: cmpq $.Lslh_ret_addr19, %rcx 933; X64-NEXT: cmovneq %rbp, %rax 934; X64-NEXT: orq %rax, %r14 935; X64-NEXT: movaps (%r14), %xmm0 936; X64-NEXT: shlq $47, %rax 937; X64-NEXT: orq %rax, %rsp 938; X64-NEXT: callq sink_v2i64 939; X64-NEXT: .Lslh_ret_addr20: 940; X64-NEXT: movq %rsp, %rax 941; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 942; X64-NEXT: sarq $63, %rax 943; X64-NEXT: cmpq $.Lslh_ret_addr20, %rcx 944; X64-NEXT: cmovneq %rbp, %rax 945; X64-NEXT: shlq $47, %rax 946; X64-NEXT: orq %rax, %rsp 947; X64-NEXT: addq $8, %rsp 948; X64-NEXT: popq %rbx 949; X64-NEXT: popq %r12 950; X64-NEXT: popq %r13 951; X64-NEXT: popq %r14 952; X64-NEXT: popq %r15 953; X64-NEXT: popq %rbp 954; X64-NEXT: retq 955; 956; X64-LFENCE-LABEL: test_vec_loads: 957; X64-LFENCE: # %bb.0: # %entry 958; X64-LFENCE-NEXT: pushq %r15 959; X64-LFENCE-NEXT: pushq %r14 960; X64-LFENCE-NEXT: pushq %r13 961; X64-LFENCE-NEXT: pushq %r12 962; X64-LFENCE-NEXT: pushq %rbx 963; X64-LFENCE-NEXT: movq %r9, %r14 964; X64-LFENCE-NEXT: movq %r8, %r15 965; X64-LFENCE-NEXT: movq %rcx, %r12 966; X64-LFENCE-NEXT: movq %rdx, %r13 967; X64-LFENCE-NEXT: movq %rsi, %rbx 968; X64-LFENCE-NEXT: movaps (%rdi), %xmm0 969; X64-LFENCE-NEXT: callq sink_v4f32 970; X64-LFENCE-NEXT: movaps (%rbx), %xmm0 971; X64-LFENCE-NEXT: callq sink_v2f64 972; X64-LFENCE-NEXT: movaps (%r13), %xmm0 973; X64-LFENCE-NEXT: callq sink_v16i8 974; X64-LFENCE-NEXT: movaps (%r12), %xmm0 975; X64-LFENCE-NEXT: callq sink_v8i16 976; X64-LFENCE-NEXT: movaps (%r15), %xmm0 977; X64-LFENCE-NEXT: callq sink_v4i32 978; X64-LFENCE-NEXT: movaps (%r14), %xmm0 979; X64-LFENCE-NEXT: callq sink_v2i64 980; X64-LFENCE-NEXT: popq %rbx 981; X64-LFENCE-NEXT: popq %r12 982; X64-LFENCE-NEXT: popq %r13 983; X64-LFENCE-NEXT: popq %r14 984; X64-LFENCE-NEXT: popq %r15 985; X64-LFENCE-NEXT: retq 986entry: 987 %x1 = load <4 x float>, <4 x float>* %v4f32ptr 988 call void @sink_v4f32(<4 x float> %x1) 989 %x2 = load <2 x double>, <2 x double>* %v2f64ptr 990 call void @sink_v2f64(<2 x double> %x2) 991 %x3 = load <16 x i8>, <16 x i8>* %v16i8ptr 992 call void @sink_v16i8(<16 x i8> %x3) 993 %x4 = load <8 x i16>, <8 x i16>* %v8i16ptr 994 call void @sink_v8i16(<8 x i16> %x4) 995 %x5 = load <4 x i32>, <4 x i32>* %v4i32ptr 996 call void @sink_v4i32(<4 x i32> %x5) 997 %x6 = load <2 x i64>, <2 x i64>* %v2i64ptr 998 call void @sink_v2i64(<2 x i64> %x6) 999 ret void 1000} 1001 1002define void @test_deferred_hardening(i32* %ptr1, i32* %ptr2, i32 %x) nounwind speculative_load_hardening { 1003; X64-LABEL: test_deferred_hardening: 1004; X64: # %bb.0: # %entry 1005; X64-NEXT: pushq %r15 1006; X64-NEXT: pushq %r14 1007; X64-NEXT: pushq %rbx 1008; X64-NEXT: movq %rsp, %rax 1009; X64-NEXT: movq %rsi, %r14 1010; X64-NEXT: movq %rdi, %rbx 1011; X64-NEXT: movq $-1, %r15 1012; X64-NEXT: sarq $63, %rax 1013; X64-NEXT: movl (%rdi), %edi 1014; X64-NEXT: incl %edi 1015; X64-NEXT: imull %edx, %edi 1016; X64-NEXT: orl %eax, %edi 1017; X64-NEXT: shlq $47, %rax 1018; X64-NEXT: orq %rax, %rsp 1019; X64-NEXT: callq sink 1020; X64-NEXT: .Lslh_ret_addr21: 1021; X64-NEXT: movq %rsp, %rax 1022; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 1023; X64-NEXT: sarq $63, %rax 1024; X64-NEXT: cmpq $.Lslh_ret_addr21, %rcx 1025; X64-NEXT: cmovneq %r15, %rax 1026; X64-NEXT: movl (%rbx), %ecx 1027; X64-NEXT: movl (%r14), %edx 1028; X64-NEXT: leal 1(%rcx,%rdx), %edi 1029; X64-NEXT: orl %eax, %edi 1030; X64-NEXT: shlq $47, %rax 1031; X64-NEXT: orq %rax, %rsp 1032; X64-NEXT: callq sink 1033; X64-NEXT: .Lslh_ret_addr22: 1034; X64-NEXT: movq %rsp, %rax 1035; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 1036; X64-NEXT: sarq $63, %rax 1037; X64-NEXT: cmpq $.Lslh_ret_addr22, %rcx 1038; X64-NEXT: cmovneq %r15, %rax 1039; X64-NEXT: movl (%rbx), %edi 1040; X64-NEXT: shll $7, %edi 1041; X64-NEXT: orl %eax, %edi 1042; X64-NEXT: shlq $47, %rax 1043; X64-NEXT: orq %rax, %rsp 1044; X64-NEXT: callq sink 1045; X64-NEXT: .Lslh_ret_addr23: 1046; X64-NEXT: movq %rsp, %rax 1047; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 1048; X64-NEXT: sarq $63, %rax 1049; X64-NEXT: cmpq $.Lslh_ret_addr23, %rcx 1050; X64-NEXT: cmovneq %r15, %rax 1051; X64-NEXT: movswl (%rbx), %edi 1052; X64-NEXT: shrl $7, %edi 1053; X64-NEXT: notl %edi 1054; X64-NEXT: orl $-65536, %edi # imm = 0xFFFF0000 1055; X64-NEXT: orl %eax, %edi 1056; X64-NEXT: shlq $47, %rax 1057; X64-NEXT: orq %rax, %rsp 1058; X64-NEXT: callq sink 1059; X64-NEXT: .Lslh_ret_addr24: 1060; X64-NEXT: movq %rsp, %rax 1061; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 1062; X64-NEXT: sarq $63, %rax 1063; X64-NEXT: cmpq $.Lslh_ret_addr24, %rcx 1064; X64-NEXT: cmovneq %r15, %rax 1065; X64-NEXT: movzwl (%rbx), %ecx 1066; X64-NEXT: rolw $9, %cx 1067; X64-NEXT: movswl %cx, %edi 1068; X64-NEXT: negl %edi 1069; X64-NEXT: orl %eax, %edi 1070; X64-NEXT: shlq $47, %rax 1071; X64-NEXT: orq %rax, %rsp 1072; X64-NEXT: callq sink 1073; X64-NEXT: .Lslh_ret_addr25: 1074; X64-NEXT: movq %rsp, %rax 1075; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 1076; X64-NEXT: sarq $63, %rax 1077; X64-NEXT: cmpq $.Lslh_ret_addr25, %rcx 1078; X64-NEXT: cmovneq %r15, %rax 1079; X64-NEXT: shlq $47, %rax 1080; X64-NEXT: orq %rax, %rsp 1081; X64-NEXT: popq %rbx 1082; X64-NEXT: popq %r14 1083; X64-NEXT: popq %r15 1084; X64-NEXT: retq 1085; 1086; X64-LFENCE-LABEL: test_deferred_hardening: 1087; X64-LFENCE: # %bb.0: # %entry 1088; X64-LFENCE-NEXT: pushq %r14 1089; X64-LFENCE-NEXT: pushq %rbx 1090; X64-LFENCE-NEXT: pushq %rax 1091; X64-LFENCE-NEXT: movq %rsi, %r14 1092; X64-LFENCE-NEXT: movq %rdi, %rbx 1093; X64-LFENCE-NEXT: movl (%rdi), %edi 1094; X64-LFENCE-NEXT: incl %edi 1095; X64-LFENCE-NEXT: imull %edx, %edi 1096; X64-LFENCE-NEXT: callq sink 1097; X64-LFENCE-NEXT: movl (%rbx), %eax 1098; X64-LFENCE-NEXT: movl (%r14), %ecx 1099; X64-LFENCE-NEXT: leal 1(%rax,%rcx), %edi 1100; X64-LFENCE-NEXT: callq sink 1101; X64-LFENCE-NEXT: movl (%rbx), %edi 1102; X64-LFENCE-NEXT: shll $7, %edi 1103; X64-LFENCE-NEXT: callq sink 1104; X64-LFENCE-NEXT: movswl (%rbx), %edi 1105; X64-LFENCE-NEXT: shrl $7, %edi 1106; X64-LFENCE-NEXT: notl %edi 1107; X64-LFENCE-NEXT: orl $-65536, %edi # imm = 0xFFFF0000 1108; X64-LFENCE-NEXT: callq sink 1109; X64-LFENCE-NEXT: movzwl (%rbx), %eax 1110; X64-LFENCE-NEXT: rolw $9, %ax 1111; X64-LFENCE-NEXT: movswl %ax, %edi 1112; X64-LFENCE-NEXT: negl %edi 1113; X64-LFENCE-NEXT: callq sink 1114; X64-LFENCE-NEXT: addq $8, %rsp 1115; X64-LFENCE-NEXT: popq %rbx 1116; X64-LFENCE-NEXT: popq %r14 1117; X64-LFENCE-NEXT: retq 1118entry: 1119 %a1 = load i32, i32* %ptr1 1120 %a2 = add i32 %a1, 1 1121 %a3 = mul i32 %a2, %x 1122 call void @sink(i32 %a3) 1123 %b1 = load i32, i32* %ptr1 1124 %b2 = add i32 %b1, 1 1125 %b3 = load i32, i32* %ptr2 1126 %b4 = add i32 %b2, %b3 1127 call void @sink(i32 %b4) 1128 %c1 = load i32, i32* %ptr1 1129 %c2 = shl i32 %c1, 7 1130 call void @sink(i32 %c2) 1131 %d1 = load i32, i32* %ptr1 1132 ; Check trunc and integer ops narrower than i32. 1133 %d2 = trunc i32 %d1 to i16 1134 %d3 = ashr i16 %d2, 7 1135 %d4 = zext i16 %d3 to i32 1136 %d5 = xor i32 %d4, -1 1137 call void @sink(i32 %d5) 1138 %e1 = load i32, i32* %ptr1 1139 %e2 = trunc i32 %e1 to i16 1140 %e3 = lshr i16 %e2, 7 1141 %e4 = shl i16 %e2, 9 1142 %e5 = or i16 %e3, %e4 1143 %e6 = sext i16 %e5 to i32 1144 %e7 = sub i32 0, %e6 1145 call void @sink(i32 %e7) 1146 ret void 1147} 1148 1149; Make sure we don't crash on idempotent atomic operations which have a 1150; hardcoded reference to RSP+offset. 1151define void @idempotent_atomic(i32* %x) speculative_load_hardening { 1152; X64-LABEL: idempotent_atomic: 1153; X64: # %bb.0: 1154; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) 1155; X64-NEXT: retq 1156; 1157; X64-LFENCE-LABEL: idempotent_atomic: 1158; X64-LFENCE: # %bb.0: 1159; X64-LFENCE-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) 1160; X64-LFENCE-NEXT: retq 1161 %tmp = atomicrmw or i32* %x, i32 0 seq_cst 1162 ret void 1163} 1164