• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This tests the NaCl intrinsics not related to atomic operations.
2
3; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
4; RUN:   --target x8632 --sandbox -i %s --args -O2 \
5; RUN:   -allow-externally-defined-symbols \
6; RUN:   | %if --need=target_X8632 --command FileCheck %s
7; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
8; RUN:   --target x8632 --sandbox -i %s --args -Om1 \
9; RUN:   -allow-externally-defined-symbols \
10; RUN:   | %if --need=target_X8632 --command FileCheck %s
11
12; Do another run w/ O2 and a different check-prefix (otherwise O2 and Om1
13; share the same "CHECK" prefix). This separate run helps check that
14; some code is optimized out.
15; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
16; RUN:   --target x8632 --sandbox -i %s --args -O2 \
17; RUN:   -allow-externally-defined-symbols \
18; RUN:   | %if --need=target_X8632 \
19; RUN:   --command FileCheck --check-prefix=CHECKO2REM %s
20
21; Do O2 runs without -sandbox to make sure llvm.nacl.read.tp gets
22; lowered to __nacl_read_tp instead of gs:0x0.
23; We also know that because it's O2, it'll have the O2REM optimizations.
24; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
25; RUN:   --target x8632 -i %s --args -O2 \
26; RUN:   -allow-externally-defined-symbols \
27; RUN:   | %if --need=target_X8632 \
28; RUN:   --command FileCheck --check-prefix=CHECKO2UNSANDBOXEDREM %s
29
30; RUN: %if --need=target_ARM32 \
31; RUN:   --command %p2i --filetype=obj --disassemble --target arm32 \
32; RUN:   -i %s --args -O2 \
33; RUN:   -allow-externally-defined-symbols \
34; RUN:   | %if --need=target_ARM32 \
35; RUN:   --command FileCheck --check-prefix ARM32 %s
36
37; RUN: %if --need=target_MIPS32 --need=allow_dump \
38; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target mips32\
39; RUN:   -i %s --args -Om1 --skip-unimplemented \
40; RUN:   -allow-externally-defined-symbols \
41; RUN:   | %if --need=target_MIPS32 --need=allow_dump \
42; RUN:   --command FileCheck --check-prefix MIPS32 %s
43
44declare i8* @llvm.nacl.read.tp()
45declare void @llvm.nacl.longjmp(i8*, i32)
46declare i32 @llvm.nacl.setjmp(i8*)
47declare float @llvm.sqrt.f32(float)
48declare double @llvm.sqrt.f64(double)
49declare float @llvm.fabs.f32(float)
50declare double @llvm.fabs.f64(double)
51declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
52declare void @llvm.trap()
53declare i16 @llvm.bswap.i16(i16)
54declare i32 @llvm.bswap.i32(i32)
55declare i64 @llvm.bswap.i64(i64)
56declare i32 @llvm.ctlz.i32(i32, i1)
57declare i64 @llvm.ctlz.i64(i64, i1)
58declare i32 @llvm.cttz.i32(i32, i1)
59declare i64 @llvm.cttz.i64(i64, i1)
60declare i32 @llvm.ctpop.i32(i32)
61declare i64 @llvm.ctpop.i64(i64)
62declare i8* @llvm.stacksave()
63declare void @llvm.stackrestore(i8*)
64
65define internal i32 @test_nacl_read_tp() {
66entry:
67  %ptr = call i8* @llvm.nacl.read.tp()
68  %__1 = ptrtoint i8* %ptr to i32
69  ret i32 %__1
70}
71; CHECK-LABEL: test_nacl_read_tp
72; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0
73; CHECKO2REM-LABEL: test_nacl_read_tp
74; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0
75; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp
76; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp
77; MIPS32-LABEL: test_nacl_read_tp
78; MIPS32: jal {{.*}} __nacl_read_tp
79
80define internal i32 @test_nacl_read_tp_more_addressing() {
81entry:
82  %ptr = call i8* @llvm.nacl.read.tp()
83  %__1 = ptrtoint i8* %ptr to i32
84  %x = add i32 %__1, %__1
85  %__3 = inttoptr i32 %x to i32*
86  %v = load i32, i32* %__3, align 1
87  %v_add = add i32 %v, 1
88
89  %ptr2 = call i8* @llvm.nacl.read.tp()
90  %__6 = ptrtoint i8* %ptr2 to i32
91  %y = add i32 %__6, 4
92  %__8 = inttoptr i32 %y to i32*
93  %v_add2 = add i32 %v, 4
94  store i32 %v_add2, i32* %__8, align 1
95  ret i32 %v
96}
97; CHECK-LABEL: test_nacl_read_tp_more_addressing
98; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0
99; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0
100; CHECKO2REM-LABEL: test_nacl_read_tp_more_addressing
101; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0
102; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0
103; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_more_addressing
104; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp
105; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp
106; MIPS32-LABEL: test_nacl_read_tp_more_addressing
107; MIPS32: jal {{.*}} __nacl_read_tp
108
109define internal i32 @test_nacl_read_tp_dead(i32 %a) {
110entry:
111  %ptr = call i8* @llvm.nacl.read.tp()
112  ; Not actually using the result of nacl read tp call.
113  ; In O2 mode this should be DCE'ed.
114  ret i32 %a
115}
116; Consider nacl.read.tp side-effect free, so it can be eliminated.
117; CHECKO2REM-LABEL: test_nacl_read_tp_dead
118; CHECKO2REM-NOT: mov e{{.*}}, DWORD PTR gs:0x0
119; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_dead
120; CHECKO2UNSANDBOXEDREM-NOT: call {{.*}} R_{{.*}} __nacl_read_tp
121; MIPS32-LABEL: test_nacl_read_tp_dead
122; MIPS32: jal {{.*}} __nacl_read_tp
123
124define internal i32 @test_setjmplongjmp(i32 %iptr_env) {
125entry:
126  %env = inttoptr i32 %iptr_env to i8*
127  %i = call i32 @llvm.nacl.setjmp(i8* %env)
128  %r1 = icmp eq i32 %i, 0
129  br i1 %r1, label %Zero, label %NonZero
130Zero:
131  ; Redundant inttoptr, to make --pnacl cast-eliding/re-insertion happy.
132  %env2 = inttoptr i32 %iptr_env to i8*
133  call void @llvm.nacl.longjmp(i8* %env2, i32 1)
134  ret i32 0
135NonZero:
136  ret i32 1
137}
138; CHECK-LABEL: test_setjmplongjmp
139; CHECK: call {{.*}} R_{{.*}} setjmp
140; CHECK: call {{.*}} R_{{.*}} longjmp
141; CHECKO2REM-LABEL: test_setjmplongjmp
142; CHECKO2REM: call {{.*}} R_{{.*}} setjmp
143; CHECKO2REM: call {{.*}} R_{{.*}} longjmp
144; ARM32-LABEL: test_setjmplongjmp
145; ARM32: bl {{.*}} setjmp
146; ARM32: bl {{.*}} longjmp
147; MIPS32-LABEL: test_setjmplongjmp
148; MIPS32: jal {{.*}} setjmp
149; MIPS32: jal {{.*}} longjmp
150
151define internal i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) {
152entry:
153  %env = inttoptr i32 %iptr_env to i8*
154  %i = call i32 @llvm.nacl.setjmp(i8* %env)
155  ret i32 %i_other
156}
157; Don't consider setjmp side-effect free, so it's not eliminated if
158; result unused.
159; CHECKO2REM-LABEL: test_setjmp_unused
160; CHECKO2REM: call {{.*}} R_{{.*}} setjmp
161; MIPS32-LABEL: test_setjmp_unused
162; MIPS32: jal {{.*}} setjmp
163
164define internal float @test_sqrt_float(float %x, i32 %iptr) {
165entry:
166  %r = call float @llvm.sqrt.f32(float %x)
167  %r2 = call float @llvm.sqrt.f32(float %r)
168  %r3 = call float @llvm.sqrt.f32(float -0.0)
169  %r4 = fadd float %r2, %r3
170  ret float %r4
171}
172; CHECK-LABEL: test_sqrt_float
173; CHECK: sqrtss xmm{{.*}}
174; CHECK: sqrtss xmm{{.*}}
175; CHECK: sqrtss xmm{{.*}},DWORD PTR
176; ARM32-LABEL: test_sqrt_float
177; ARM32: vsqrt.f32
178; ARM32: vsqrt.f32
179; ARM32: vsqrt.f32
180; ARM32: vadd.f32
181; MIPS32-LABEL: test_sqrt_float
182; MIPS32: sqrt.s
183; MIPS32: sqrt.s
184; MIPS32: sqrt.s
185; MIPS32: add.s
186
187define internal float @test_sqrt_float_mergeable_load(float %x, i32 %iptr) {
188entry:
189  %__2 = inttoptr i32 %iptr to float*
190  %y = load float, float* %__2, align 4
191  %r5 = call float @llvm.sqrt.f32(float %y)
192  %r6 = fadd float %x, %r5
193  ret float %r6
194}
195; CHECK-LABEL: test_sqrt_float_mergeable_load
196; We could fold the load and the sqrt into one operation, but the
197; current folding only handles load + arithmetic op. The sqrt inst
198; is considered an intrinsic call and not an arithmetic op.
199; CHECK: sqrtss xmm{{.*}}
200; ARM32-LABEL: test_sqrt_float_mergeable_load
201; ARM32: vldr s{{.*}}
202; ARM32: vsqrt.f32
203
204define internal double @test_sqrt_double(double %x, i32 %iptr) {
205entry:
206  %r = call double @llvm.sqrt.f64(double %x)
207  %r2 = call double @llvm.sqrt.f64(double %r)
208  %r3 = call double @llvm.sqrt.f64(double -0.0)
209  %r4 = fadd double %r2, %r3
210  ret double %r4
211}
212; CHECK-LABEL: test_sqrt_double
213; CHECK: sqrtsd xmm{{.*}}
214; CHECK: sqrtsd xmm{{.*}}
215; CHECK: sqrtsd xmm{{.*}},QWORD PTR
216; ARM32-LABEL: test_sqrt_double
217; ARM32: vsqrt.f64
218; ARM32: vsqrt.f64
219; ARM32: vsqrt.f64
220; ARM32: vadd.f64
221; MIPS32-LABEL: test_sqrt_double
222; MIPS32: sqrt.d
223; MIPS32: sqrt.d
224; MIPS32: sqrt.d
225; MIPS32: add.d
226
227define internal double @test_sqrt_double_mergeable_load(double %x, i32 %iptr) {
228entry:
229  %__2 = inttoptr i32 %iptr to double*
230  %y = load double, double* %__2, align 8
231  %r5 = call double @llvm.sqrt.f64(double %y)
232  %r6 = fadd double %x, %r5
233  ret double %r6
234}
235; CHECK-LABEL: test_sqrt_double_mergeable_load
236; CHECK: sqrtsd xmm{{.*}}
237; ARM32-LABEL: test_sqrt_double_mergeable_load
238; ARM32: vldr d{{.*}}
239; ARM32: vsqrt.f64
240
241define internal float @test_sqrt_ignored(float %x, double %y) {
242entry:
243  %ignored1 = call float @llvm.sqrt.f32(float %x)
244  %ignored2 = call double @llvm.sqrt.f64(double %y)
245  ret float 0.0
246}
247; CHECKO2REM-LABEL: test_sqrt_ignored
248; CHECKO2REM-NOT: sqrtss
249; CHECKO2REM-NOT: sqrtsd
250; MIPS32-LABEL: test_sqrt_ignored
251; MIPS32: sqrt.s
252; MIPS32: sqrt.d
253
254define internal float @test_fabs_float(float %x) {
255entry:
256  %r = call float @llvm.fabs.f32(float %x)
257  %r2 = call float @llvm.fabs.f32(float %r)
258  %r3 = call float @llvm.fabs.f32(float -0.0)
259  %r4 = fadd float %r2, %r3
260  ret float %r4
261}
262;;; Specially check that the pand instruction doesn't try to operate on a 32-bit
263;;; (f32) memory operand, and instead uses two xmm registers.
264; CHECK-LABEL: test_fabs_float
265; CHECK: pcmpeqd
266; CHECK: psrld
267; CHECK: pand {{.*}}xmm{{.*}}xmm
268; CHECK: pcmpeqd
269; CHECK: psrld
270; CHECK: pand {{.*}}xmm{{.*}}xmm
271; CHECK: pcmpeqd
272; CHECK: psrld
273; CHECK: pand {{.*}}xmm{{.*}}xmm
274; MIPS32-LABEL: test_fabs_float
275; MIPS32: abs.s
276; MIPS32: abs.s
277; MIPS32: abs.s
278; MIPS32: add.s
279
280define internal double @test_fabs_double(double %x) {
281entry:
282  %r = call double @llvm.fabs.f64(double %x)
283  %r2 = call double @llvm.fabs.f64(double %r)
284  %r3 = call double @llvm.fabs.f64(double -0.0)
285  %r4 = fadd double %r2, %r3
286  ret double %r4
287}
288;;; Specially check that the pand instruction doesn't try to operate on a 64-bit
289;;; (f64) memory operand, and instead uses two xmm registers.
290; CHECK-LABEL: test_fabs_double
291; CHECK: pcmpeqd
292; CHECK: psrlq
293; CHECK: pand {{.*}}xmm{{.*}}xmm
294; CHECK: pcmpeqd
295; CHECK: psrlq
296; CHECK: pand {{.*}}xmm{{.*}}xmm
297; CHECK: pcmpeqd
298; CHECK: psrlq
299; CHECK: pand {{.*}}xmm{{.*}}xmm
300; MIPS32-LABEL: test_fabs_double
301; MIPS32: abs.d
302; MIPS32: abs.d
303; MIPS32: abs.d
304; MIPS32: add.d
305
306define internal <4 x float> @test_fabs_v4f32(<4 x float> %x) {
307entry:
308  %r = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
309  %r2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %r)
310  %r3 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
311  %r4 = fadd <4 x float> %r2, %r3
312  ret <4 x float> %r4
313}
314; CHECK-LABEL: test_fabs_v4f32
315; CHECK: pcmpeqd
316; CHECK: psrld
317; CHECK: pand
318; CHECK: pcmpeqd
319; CHECK: psrld
320; CHECK: pand
321; CHECK: pcmpeqd
322; CHECK: psrld
323; CHECK: pand
324
325define internal i32 @test_trap(i32 %br) {
326entry:
327  %r1 = icmp eq i32 %br, 0
328  br i1 %r1, label %Zero, label %NonZero
329Zero:
330  call void @llvm.trap()
331  unreachable
332NonZero:
333  ret i32 1
334}
335; CHECK-LABEL: test_trap
336; CHECK: ud2
337; ARM32-LABEL: test_trap
338; ARM32: udf
339; MIPS32-LABEL: test_trap
340; MIPS32: teq zero,zero
341
342define internal i32 @test_bswap_16(i32 %x) {
343entry:
344  %x_trunc = trunc i32 %x to i16
345  %r = call i16 @llvm.bswap.i16(i16 %x_trunc)
346  %r_zext = zext i16 %r to i32
347  ret i32 %r_zext
348}
349; CHECK-LABEL: test_bswap_16
350; Make sure this is the right operand size so that the most significant bit
351; to least significant bit rotation happens at the right boundary.
352; CHECK: rol {{[abcd]x|si|di|bp|word ptr}},0x8
353; ARM32-LABEL: test_bswap_16
354; ARM32: rev
355; ARM32: lsr {{.*}} #16
356; MIPS32-LABEL: test_bswap_16
357; MIPS32: sll {{.*}},0x8
358; MIPS32: lui {{.*}},0xff
359; MIPS32: and
360; MIPS32: sll {{.*}},0x18
361; MIPS32: or
362; MIPS32: srl {{.*}},0x10
363; MIPS32: andi {{.*}},0xffff
364
365define internal i32 @test_bswap_32(i32 %x) {
366entry:
367  %r = call i32 @llvm.bswap.i32(i32 %x)
368  ret i32 %r
369}
370; CHECK-LABEL: test_bswap_32
371; CHECK: bswap e{{.*}}
372; ARM32-LABEL: test_bswap_32
373; ARM32: rev
374; MIPS32-LABEL: test_bswap_32
375; MIPS32: srl {{.*}},0x18
376; MIPS32: srl {{.*}},0x8
377; MIPS32: andi {{.*}},0xff00
378; MIPS32: or
379; MIPS32: sll {{.*}},0x8
380; MIPS32: lui {{.*}},0xff
381; MIPS32: and
382; MIPS32: sll {{.*}},0x18
383; MIPS32: or
384; MIPS32: or
385
386define internal i64 @test_bswap_64(i64 %x) {
387entry:
388  %r = call i64 @llvm.bswap.i64(i64 %x)
389  ret i64 %r
390}
391; CHECK-LABEL: test_bswap_64
392; CHECK: bswap e{{.*}}
393; CHECK: bswap e{{.*}}
394; ARM32-LABEL: test_bswap_64
395; ARM32: rev
396; ARM32: rev
397; MIPS32-LABEL: test_bswap_64
398; MIPS32: sll {{.*}},0x8
399; MIPS32: srl {{.*}},0x18
400; MIPS32: srl {{.*}},0x8
401; MIPS32: andi {{.*}},0xff00
402; MIPS32: lui {{.*}},0xff
403; MIPS32: or
404; MIPS32: and
405; MIPS32: sll {{.*}},0x18
406; MIPS32: or
407; MIPS32: srl {{.*}},0x18
408; MIPS32: srl {{.*}},0x8
409; MIPS32: andi {{.*}},0xff00
410; MIPS32: or
411; MIPS32: or
412; MIPS32: sll {{.*}},0x8
413; MIPS32: and
414; MIPS32: sll {{.*}},0x18
415; MIPS32: or
416; MIPS32: or
417
418define internal i64 @test_bswap_64_undef() {
419entry:
420  %r = call i64 @llvm.bswap.i64(i64 undef)
421  ret i64 %r
422}
423; CHECK-LABEL: test_bswap_64_undef
424; CHECK: bswap e{{.*}}
425; CHECK: bswap e{{.*}}
426; ARM32-LABEL: test_bswap_64
427; ARM32: rev
428; ARM32: rev
429; MIPS32-LABEL: test_bswap_64_undef
430; MIPS32: sll {{.*}},0x8
431; MIPS32: srl {{.*}},0x18
432; MIPS32: srl {{.*}},0x8
433; MIPS32: andi {{.*}},0xff00
434; MIPS32: lui {{.*}},0xff
435; MIPS32: or
436; MIPS32: and
437; MIPS32: sll {{.*}},0x18
438; MIPS32: or
439; MIPS32: srl {{.*}},0x18
440; MIPS32: srl {{.*}},0x8
441; MIPS32: andi {{.*}},0xff00
442; MIPS32: or
443; MIPS32: or
444; MIPS32: sll {{.*}},0x8
445; MIPS32: and
446; MIPS32: sll {{.*}},0x18
447; MIPS32: or
448; MIPS32: or
449
450define internal i32 @test_ctlz_32(i32 %x) {
451entry:
452  %r = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
453  ret i32 %r
454}
455; CHECK-LABEL: test_ctlz_32
456; TODO(jvoung): If we detect that LZCNT is supported, then use that
457; and avoid the need to do the cmovne and xor stuff to guarantee that
458; the result is well-defined w/ input == 0.
459; CHECK: bsr [[REG_TMP:e.*]],{{.*}}
460; CHECK: mov [[REG_RES:e.*]],0x3f
461; CHECK: cmovne [[REG_RES]],[[REG_TMP]]
462; CHECK: xor [[REG_RES]],0x1f
463; ARM32-LABEL: test_ctlz_32
464; ARM32: clz
465; MIPS32-LABEL: test_ctlz_32
466; MIPS32: clz
467
468define internal i32 @test_ctlz_32_const() {
469entry:
470  %r = call i32 @llvm.ctlz.i32(i32 123456, i1 false)
471  ret i32 %r
472}
473; Could potentially constant fold this, but the front-end should have done that.
474; The dest operand must be a register and the source operand must be a register
475; or memory.
476; CHECK-LABEL: test_ctlz_32_const
477; CHECK: bsr e{{.*}},{{.*}}e{{.*}}
478; ARM32-LABEL: test_ctlz_32_const
479; ARM32: clz
480; MIPS32-LABEL: test_ctlz_32_const
481; MIPS32: clz
482
483define internal i32 @test_ctlz_32_ignored(i32 %x) {
484entry:
485  %ignored = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
486  ret i32 1
487}
488; CHECKO2REM-LABEL: test_ctlz_32_ignored
489; CHECKO2REM-NOT: bsr
490
491define internal i64 @test_ctlz_64(i64 %x) {
492entry:
493  %r = call i64 @llvm.ctlz.i64(i64 %x, i1 false)
494  ret i64 %r
495}
496; CHECKO2REM-LABEL: test_ctlz_64
497; CHECK-LABEL: test_ctlz_64
498; CHECK: bsr [[REG_TMP1:e.*]],{{.*}}
499; CHECK: mov [[REG_RES1:e.*]],0x3f
500; CHECK: cmovne [[REG_RES1]],[[REG_TMP1]]
501; CHECK: xor [[REG_RES1]],0x1f
502; CHECK: add [[REG_RES1]],0x20
503; CHECK: bsr [[REG_RES2:e.*]],{{.*}}
504; CHECK: xor [[REG_RES2]],0x1f
505; CHECK: test [[REG_UPPER:.*]],[[REG_UPPER]]
506; CHECK: cmove [[REG_RES2]],[[REG_RES1]]
507; CHECK: mov {{.*}},0x0
508; ARM32-LABEL: test_ctlz_64
509; ARM32: clz
510; ARM32: cmp {{.*}}, #0
511; ARM32: add {{.*}}, #32
512; ARM32: clzne
513; ARM32: mov {{.*}}, #0
514; MIPS32-LABEL: test_ctlz_64
515; MIPS32: clz
516; MIPS32: clz
517; MIPS32: addiu
518; MIPS32: movn
519; MIPS32: addiu
520
521define internal i32 @test_ctlz_64_const(i64 %x) {
522entry:
523  %r = call i64 @llvm.ctlz.i64(i64 123456789012, i1 false)
524  %r2 = trunc i64 %r to i32
525  ret i32 %r2
526}
527; CHECK-LABEL: test_ctlz_64_const
528; CHECK: bsr e{{.*}},{{.*}}e{{.*}}
529; CHECK: bsr e{{.*}},{{.*}}e{{.*}}
530; ARM32-LABEL: test_ctlz_64
531; ARM32: clz
532; ARM32: clzne
533; MIPS32-LABEL: test_ctlz_64_const
534; MIPS32: clz
535; MIPS32: clz
536; MIPS32: addiu
537; MIPS32: movn
538; MIPS32: addiu
539
540define internal i32 @test_ctlz_64_ignored(i64 %x) {
541entry:
542  %ignored = call i64 @llvm.ctlz.i64(i64 1234567890, i1 false)
543  ret i32 2
544}
545; CHECKO2REM-LABEL: test_ctlz_64_ignored
546; CHECKO2REM-NOT: bsr
547
548define internal i32 @test_cttz_32(i32 %x) {
549entry:
550  %r = call i32 @llvm.cttz.i32(i32 %x, i1 false)
551  ret i32 %r
552}
553; CHECK-LABEL: test_cttz_32
554; CHECK: bsf [[REG_IF_NOTZERO:e.*]],{{.*}}
555; CHECK: mov [[REG_IF_ZERO:e.*]],0x20
556; CHECK: cmovne [[REG_IF_ZERO]],[[REG_IF_NOTZERO]]
557; ARM32-LABEL: test_cttz_32
558; ARM32: rbit
559; ARM32: clz
560; MIPS32-LABEL: test_cttz_32
561; MIPS32: addiu
562; MIPS32: nor
563; MIPS32: and
564; MIPS32: clz
565; MIPS32: li
566; MIPS32: subu
567
568define internal i64 @test_cttz_64(i64 %x) {
569entry:
570  %r = call i64 @llvm.cttz.i64(i64 %x, i1 false)
571  ret i64 %r
572}
573; CHECK-LABEL: test_cttz_64
574; CHECK: bsf [[REG_IF_NOTZERO:e.*]],{{.*}}
575; CHECK: mov [[REG_RES1:e.*]],0x20
576; CHECK: cmovne [[REG_RES1]],[[REG_IF_NOTZERO]]
577; CHECK: add [[REG_RES1]],0x20
578; CHECK: bsf [[REG_RES2:e.*]],[[REG_LOWER:.*]]
579; CHECK: test [[REG_LOWER]],[[REG_LOWER]]
580; CHECK: cmove [[REG_RES2]],[[REG_RES1]]
581; CHECK: mov {{.*}},0x0
582; ARM32-LABEL: test_cttz_64
583; ARM32: rbit
584; ARM32: rbit
585; ARM32: clz
586; ARM32: cmp {{.*}}, #0
587; ARM32: add {{.*}}, #32
588; ARM32: clzne
589; ARM32: mov {{.*}}, #0
590; MIPS32-LABEL: test_cttz_64
591; MIPS32: addiu
592; MIPS32: nor
593; MIPS32: and
594; MIPS32: clz
595; MIPS32: li
596; MIPS32: subu
597; MIPS32: addiu
598; MIPS32: nor
599; MIPS32: and
600; MIPS32: clz
601; MIPS32: li
602; MIPS32: subu
603
604define internal i32 @test_popcount_32(i32 %x) {
605entry:
606  %r = call i32 @llvm.ctpop.i32(i32 %x)
607  ret i32 %r
608}
609; CHECK-LABEL: test_popcount_32
610; CHECK: call {{.*}} R_{{.*}} __popcountsi2
611; ARM32-LABEL: test_popcount_32
612; ARM32: bl {{.*}} __popcountsi2
613; MIPS32-LABEL: test_popcount_32
614; MIPS32: jal {{.*}} __popcountsi2
615
616define internal i64 @test_popcount_64(i64 %x) {
617entry:
618  %r = call i64 @llvm.ctpop.i64(i64 %x)
619  ret i64 %r
620}
621; CHECK-LABEL: test_popcount_64
622; CHECK: call {{.*}} R_{{.*}} __popcountdi2
623; __popcountdi2 only returns a 32-bit result, so clear the upper bits of
624; the return value just in case.
625; CHECK: mov {{.*}},0x0
626; ARM32-LABEL: test_popcount_64
627; ARM32: bl {{.*}} __popcountdi2
628; ARM32: mov {{.*}}, #0
629; MIPS32-LABEL: test_popcount_64
630; MIPS32: jal {{.*}} __popcountdi2
631
632define internal i32 @test_popcount_64_ret_i32(i64 %x) {
633entry:
634  %r_i64 = call i64 @llvm.ctpop.i64(i64 %x)
635  %r = trunc i64 %r_i64 to i32
636  ret i32 %r
637}
638; If there is a trunc, then the mov {{.*}}, 0 is dead and gets optimized out.
639; CHECKO2REM-LABEL: test_popcount_64_ret_i32
640; CHECKO2REM: call {{.*}} R_{{.*}} __popcountdi2
641; CHECKO2REM-NOT: mov {{.*}}, 0
642; MIPS32-LABEL: test_popcount_64_ret_i32
643; MIPS32: jal {{.*}} __popcountdi2
644; MIPS32: sw v0,{{.*}}
645; MIPS32: sw v1,{{.*}}
646; MIPS32: lw v0,{{.*}}
647; MIPS32: lw ra,{{.*}}
648
649define internal void @test_stacksave_noalloca() {
650entry:
651  %sp = call i8* @llvm.stacksave()
652  call void @llvm.stackrestore(i8* %sp)
653  ret void
654}
655; CHECK-LABEL: test_stacksave_noalloca
656; CHECK: mov {{.*}},esp
657; CHECK: mov esp,{{.*}}
658; ARM32-LABEL: test_stacksave_noalloca
659; ARM32: mov {{.*}}, sp
660; ARM32: mov sp, {{.*}}
661; MIPS32-LABEL: test_stacksave_noalloca
662; MIPS32: 	sw	sp,{{.*}}
663; MIPS32: 	lw	[[REG:.*]],0(sp)
664; MIPS32: 	move	sp,[[REG]]
665
666declare i32 @foo(i32 %x)
667
668define internal void @test_stacksave_multiple(i32 %x) {
669entry:
670  %x_4 = mul i32 %x, 4
671  %sp1 = call i8* @llvm.stacksave()
672  %tmp1 = alloca i8, i32 %x_4, align 4
673
674  %sp2 = call i8* @llvm.stacksave()
675  %tmp2 = alloca i8, i32 %x_4, align 4
676
677  %y = call i32 @foo(i32 %x)
678
679  %sp3 = call i8* @llvm.stacksave()
680  %tmp3 = alloca i8, i32 %x_4, align 4
681
682  %__9 = bitcast i8* %tmp1 to i32*
683  store i32 %y, i32* %__9, align 1
684
685  %__10 = bitcast i8* %tmp2 to i32*
686  store i32 %x, i32* %__10, align 1
687
688  %__11 = bitcast i8* %tmp3 to i32*
689  store i32 %x, i32* %__11, align 1
690
691  call void @llvm.stackrestore(i8* %sp1)
692  ret void
693}
694; CHECK-LABEL: test_stacksave_multiple
695; lea is used to copy from esp for the allocas.
696; Otherwise, only one stacksave is live.
697; CHECK: mov ebp,esp
698; CHECK: mov {{.*}},esp
699; CHECK: lea {{.*}},[esp+0x10]
700; CHECK: lea {{.*}},[esp+0x10]
701; CHECK: call
702; CHECK: mov esp,{{.*}}
703; CHECK: mov esp,ebp
704; ARM32-LABEL: test_stacksave_multiple
705; ARM32: mov {{.*}}, sp
706; ARM32: mov {{.*}}, sp
707; ARM32: mov {{.*}}, sp
708; ARM32: mov sp, {{.*}}
709; MIPS32-LABEL: test_stacksave_multiple
710; MIPS32: 	sw	sp,[[MEMLOC:.*]]
711; MIPS32: 	sw	sp,{{.*}}
712; MIPS32: 	sw	sp,{{.*}}
713; MIPS32: 	lw	[[REG:.*]],[[MEMLOC]]
714; MIPS32: 	move	sp,[[REG]]
715