1; Tests basics and corner cases of x86-32 sandboxing, using -Om1 in 2; the hope that the output will remain stable. When packing bundles, 3; we try to limit to a few instructions with well known sizes and 4; minimal use of registers and stack slots in the lowering sequence. 5 6; XFAIL: filtype=asm 7; RUN: %p2i -i %s --sandbox --filetype=obj --disassemble --args -Om1 \ 8; RUN: -allow-externally-defined-symbols \ 9; RUN: -ffunction-sections | FileCheck %s 10 11; RUN: %p2i -i %s --sandbox --filetype=obj --disassemble --target=x8664 \ 12; RUN: --args -Om1 -allow-externally-defined-symbols \ 13; RUN: -ffunction-sections | FileCheck %s --check-prefix X8664 14 15declare void @call_target() 16@global_byte = internal global [1 x i8] zeroinitializer 17@global_short = internal global [2 x i8] zeroinitializer 18@global_int = internal global [4 x i8] zeroinitializer 19 20; A direct call sequence uses the right mask and register-call sequence. 21define internal void @test_direct_call() { 22entry: 23 call void @call_target() 24 ret void 25} 26; CHECK-LABEL: test_direct_call 27; CHECK: nop 28; CHECK: 1b: {{.*}} call 1c 29; CHECK-NEXT: 20: 30; X8664-LABEL: test_direct_call 31; X8664: push {{.*}} R_X86_64_32S test_direct_call+{{.*}}20 32; X8664: jmp {{.*}} call_target 33 34; An indirect call sequence uses the right mask and register-call sequence. 35define internal void @test_indirect_call(i32 %target) { 36entry: 37 %__1 = inttoptr i32 %target to void ()* 38 call void %__1() 39 ret void 40} 41; CHECK-LABEL: test_indirect_call 42; CHECK: mov [[REG:.*]],DWORD PTR [esp 43; CHECK-NEXT: nop 44; CHECK: 1b: {{.*}} and [[REG]],0xffffffe0 45; CHECK-NEXT: call [[REG]] 46; CHECk-NEXT: 20: 47; X8664-LABEL: test_indirect_call 48; X8664: push {{.*}} R_X86_64_32S test_indirect_call+{{.*}}20 49; X8664: {{.*}} and e[[REG:..]],0xffffffe0 50; X8664: add r[[REG]],r15 51; X8664: jmp r[[REG]] 52 53; A return sequence uses the right pop / mask / jmp sequence. 54define internal void @test_ret() { 55entry: 56 ret void 57} 58; CHECK-LABEL: test_ret 59; CHECK: pop ecx 60; CHECK-NEXT: and ecx,0xffffffe0 61; CHECK-NEXT: jmp ecx 62; X8664-LABEL: test_ret 63; X8664: pop rcx 64; X8664: and ecx,0xffffffe0 65; X8664: add rcx,r15 66; X8664: jmp rcx 67 68; A perfectly packed bundle should not have nops at the end. 69define internal void @packed_bundle() { 70entry: 71 call void @call_target() 72 ; bundle boundary 73 %addr_byte = bitcast [1 x i8]* @global_byte to i8* 74 %addr_short = bitcast [2 x i8]* @global_short to i16* 75 store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction 76 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 77 store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction 78 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 79 ; bundle boundary 80 store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction 81 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 82 ret void 83} 84; CHECK-LABEL: packed_bundle 85; CHECK: call 86; CHECK-NEXT: 20: {{.*}} mov BYTE PTR 87; CHECK-NEXT: 27: {{.*}} mov WORD PTR 88; CHECK-NEXT: 30: {{.*}} mov BYTE PTR 89; CHECK-NEXT: 37: {{.*}} mov WORD PTR 90; CHECK-NEXT: 40: {{.*}} mov BYTE PTR 91; CHECK-NEXT: 47: {{.*}} mov WORD PTR 92 93; An imperfectly packed bundle should have one or more nops at the end. 94define internal void @nonpacked_bundle() { 95entry: 96 call void @call_target() 97 ; bundle boundary 98 %addr_short = bitcast [2 x i8]* @global_short to i16* 99 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 100 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 101 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 102 ; nop padding 103 ; bundle boundary 104 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 105 ret void 106} 107; CHECK-LABEL: nonpacked_bundle 108; CHECK: call 109; CHECK-NEXT: 20: {{.*}} mov WORD PTR 110; CHECK-NEXT: 29: {{.*}} mov WORD PTR 111; CHECK-NEXT: 32: {{.*}} mov WORD PTR 112; CHECK-NEXT: 3b: {{.*}} nop 113; CHECK: 40: {{.*}} mov WORD PTR 114 115; A zero-byte instruction (e.g. local label definition) at a bundle 116; boundary should not trigger nop padding. 117define internal void @label_at_boundary(i32 %arg, float %farg1, float %farg2) { 118entry: 119 %argi8 = trunc i32 %arg to i8 120 call void @call_target() 121 ; bundle boundary 122 %addr_short = bitcast [2 x i8]* @global_short to i16* 123 %addr_int = bitcast [4 x i8]* @global_int to i32* 124 store i32 0, i32* %addr_int, align 1 ; 10-byte instruction 125 %blah = select i1 true, i8 %argi8, i8 %argi8 ; 22-byte lowering sequence 126 ; label is here 127 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 128 ret void 129} 130; CHECK-LABEL: label_at_boundary 131; CHECK: call 132; We rely on a particular 7-instruction 22-byte Om1 lowering sequence 133; for select. 134; CHECK-NEXT: 20: {{.*}} mov DWORD PTR 135; CHECK-NEXT: 2a: {{.*}} mov {{.*}},0x1 136; CHECK-NEXT: 2c: {{.*}} cmp {{.*}},0x0 137; CHECK-NEXT: 2e: {{.*}} mov {{.*}},BYTE PTR 138; CHECK-NEXT: 32: {{.*}} mov BYTE PTR 139; CHECK-NEXT: 36: {{.*}} jne 40 140; CHECK-NEXT: 38: {{.*}} mov {{.*}},BYTE PTR 141; CHECK-NEXT: 3c: {{.*}} mov BYTE PTR 142; CHECK-NEXT: 40: {{.*}} mov WORD PTR 143 144; Bundle lock without padding. 145define internal void @bundle_lock_without_padding() { 146entry: 147 %addr_short = bitcast [2 x i8]* @global_short to i16* 148 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 149 ret void 150} 151; CHECK-LABEL: bundle_lock_without_padding 152; CHECK: mov WORD PTR 153; CHECK-NEXT: pop ecx 154; CHECK-NEXT: and ecx,0xffffffe0 155; CHECK-NEXT: jmp ecx 156 157; Bundle lock with padding. 158define internal void @bundle_lock_with_padding() { 159entry: 160 call void @call_target() 161 ; bundle boundary 162 %addr_byte = bitcast [1 x i8]* @global_byte to i8* 163 %addr_short = bitcast [2 x i8]* @global_short to i16* 164 store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction 165 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 166 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 167 ret void 168 ; 3 bytes to restore stack pointer 169 ; 1 byte to pop ecx 170 ; bundle_lock 171 ; 3 bytes to mask ecx 172 ; This is now 32 bytes from the beginning of the bundle, so 173 ; a 3-byte nop will need to be emitted before the bundle_lock. 174 ; 2 bytes to jump to ecx 175 ; bundle_unlock 176} 177; CHECK-LABEL: bundle_lock_with_padding 178; CHECK: call 179; CHECK-NEXT: 20: {{.*}} mov BYTE PTR 180; CHECK-NEXT: 27: {{.*}} mov WORD PTR 181; CHECK-NEXT: 30: {{.*}} mov WORD PTR 182; CHECK-NEXT: 39: {{.*}} add esp, 183; CHECK-NEXT: 3c: {{.*}} pop ecx 184; CHECK-NEXT: 3d: {{.*}} nop 185; CHECK-NEXT: 40: {{.*}} and ecx,0xffffffe0 186; CHECK-NEXT: 43: {{.*}} jmp ecx 187 188; Bundle lock align_to_end without any padding. 189define internal void @bundle_lock_align_to_end_padding_0() { 190entry: 191 call void @call_target() 192 ; bundle boundary 193 %addr_short = bitcast [2 x i8]* @global_short to i16* 194 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 195 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 196 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 197 call void @call_target() ; 5-byte instruction 198 ret void 199} 200; CHECK-LABEL: bundle_lock_align_to_end_padding_0 201; CHECK: call 202; CHECK-NEXT: 20: {{.*}} mov WORD PTR 203; CHECK-NEXT: 29: {{.*}} mov WORD PTR 204; CHECK-NEXT: 32: {{.*}} mov WORD PTR 205; CHECK-NEXT: 3b: {{.*}} call 206 207; Bundle lock align_to_end with one bunch of padding. 208define internal void @bundle_lock_align_to_end_padding_1() { 209entry: 210 call void @call_target() 211 ; bundle boundary 212 %addr_byte = bitcast [1 x i8]* @global_byte to i8* 213 store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction 214 store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction 215 store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction 216 call void @call_target() ; 5-byte instruction 217 ret void 218} 219; CHECK-LABEL: bundle_lock_align_to_end_padding_1 220; CHECK: call 221; CHECK-NEXT: 20: {{.*}} mov BYTE PTR 222; CHECK-NEXT: 27: {{.*}} mov BYTE PTR 223; CHECK-NEXT: 2e: {{.*}} mov BYTE PTR 224; CHECK-NEXT: 35: {{.*}} nop 225; CHECK: 3b: {{.*}} call 226 227; Bundle lock align_to_end with two bunches of padding. 228define internal void @bundle_lock_align_to_end_padding_2(i32 %target) { 229entry: 230 call void @call_target() 231 ; bundle boundary 232 %addr_byte = bitcast [1 x i8]* @global_byte to i8* 233 %addr_short = bitcast [2 x i8]* @global_short to i16* 234 %__1 = inttoptr i32 %target to void ()* 235 store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction 236 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 237 store i16 0, i16* %addr_short, align 1 ; 9-byte instruction 238 call void %__1() 239 ; 4 bytes to load %target into a register 240 ; bundle_lock align_to_end 241 ; 3 bytes to mask the register 242 ; This is now 32 bytes from the beginning of the bundle, so 243 ; a 3-byte nop will need to be emitted before the bundle_lock, 244 ; followed by a 27-byte nop before the mask/jump. 245 ; 2 bytes to jump to the register 246 ; bundle_unlock 247 ret void 248} 249; CHECK-LABEL: bundle_lock_align_to_end_padding_2 250; CHECK: call 251; CHECK-NEXT: 20: {{.*}} mov BYTE PTR 252; CHECK-NEXT: 27: {{.*}} mov WORD PTR 253; CHECK-NEXT: 30: {{.*}} mov WORD PTR 254; CHECK-NEXT: 39: {{.*}} mov [[REG:.*]],DWORD PTR [esp 255; CHECK-NEXT: 3d: {{.*}} nop 256; CHECK: 40: {{.*}} nop 257; CHECK: 5b: {{.*}} and [[REG]],0xffffffe0 258; CHECK-NEXT: 5e: {{.*}} call [[REG]] 259 260; Tests the pad_to_end bundle alignment with no padding bytes needed. 261define internal void @bundle_lock_pad_to_end_padding_0(i32 %arg0, i32 %arg1, 262 i32 %arg3, i32 %arg4, 263 i32 %arg5, i32 %arg6) { 264 call void @call_target() 265 ; bundle boundary 266 %x = add i32 %arg5, %arg6 ; 12 bytes 267 %y = trunc i32 %x to i16 ; 10 bytes 268 call void @call_target() ; 10 bytes 269 ; bundle boundary 270 ret void 271} 272; X8664: 56: {{.*}} push {{.*}} R_X86_64_32S bundle_lock_pad_to_end_padding_0+{{.*}}60 273; X8664: 5b: {{.*}} jmp {{.*}} call_target 274; X8664: 60: {{.*}} add 275 276; Tests the pad_to_end bundle alignment with 11 padding bytes needed, and some 277; instructions before the call. 278define internal void @bundle_lock_pad_to_end_padding_11(i32 %arg0, i32 %arg1, 279 i32 %arg3, i32 %arg4, 280 i32 %arg5, i32 %arg6) { 281 call void @call_target() 282 ; bundle boundary 283 %x = add i32 %arg5, %arg6 ; 11 bytes 284 call void @call_target() ; 10 bytes 285 ; 11 bytes of nop 286 ; bundle boundary 287 ret void 288} 289; X8664: 4b: {{.*}} push {{.*}} R_X86_64_32S bundle_lock_pad_to_end_padding_11+{{.*}}60 290; X8664: 50: {{.*}} jmp {{.*}} call_target 291; X8664: 55: {{.*}} nop 292; X8664: 5d: {{.*}} nop 293; X8664: 60: {{.*}} add 294 295; Tests the pad_to_end bundle alignment with 22 padding bytes needed, and no 296; instructions before the call. 297define internal void @bundle_lock_pad_to_end_padding_22(i32 %arg0, i32 %arg1, 298 i32 %arg3, i32 %arg4, 299 i32 %arg5, i32 %arg6) { 300 call void @call_target() 301 ; bundle boundary 302 call void @call_target() ; 10 bytes 303 ; 22 bytes of nop 304 ; bundle boundary 305 ret void 306} 307; X8664: 40: {{.*}} push {{.*}} R_X86_64_32S bundle_lock_pad_to_end_padding_22+{{.*}}60 308; X8664: 45: {{.*}} jmp {{.*}} call_target 309; X8664: 4a: {{.*}} nop 310; X8664: 52: {{.*}} nop 311; X8664: 5a: {{.*}} nop 312; X8664: 60: {{.*}} add 313 314; Stack adjustment state during an argument push sequence gets 315; properly checkpointed and restored during the two passes, as 316; observed by the stack adjustment for accessing stack-allocated 317; variables. 318define internal void @checkpoint_restore_stack_adjustment(i32 %arg) { 319entry: 320 call void @call_target() 321 ; bundle boundary 322 call void @checkpoint_restore_stack_adjustment(i32 %arg) 323 ret void 324} 325; CHECK-LABEL: checkpoint_restore_stack_adjustment 326; CHECK: sub esp,0x1c 327; CHECK: call 328; The address of %arg should be [esp+0x20], not [esp+0x30]. 329; CHECK-NEXT: mov [[REG:.*]],DWORD PTR [esp+0x20] 330; CHECK-NEXT: mov DWORD PTR [esp],[[REG]] 331; CHECK: call 332; CHECK: add esp,0x1c 333