• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -O2 < %s -march=nvptx -mcpu=sm_35 | FileCheck %s --check-prefix=O2 --check-prefix=CHECK
2; RUN: llc -O0 < %s -march=nvptx -mcpu=sm_35 | FileCheck %s --check-prefix=O0 --check-prefix=CHECK
3
4; The following IR
5;
6;   quot = n / d
7;   rem  = n % d
8;
9; should be transformed into
10;
11;   quot = n / d
12;   rem = n - (n / d) * d
13;
14; during NVPTX isel, at -O2.  At -O0, we should leave it alone.
15
16; CHECK-LABEL: sdiv32(
17define void @sdiv32(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
18  ; CHECK: div.s32 [[quot:%r[0-9]+]], [[num:%r[0-9]+]], [[den:%r[0-9]+]];
19  %quot = sdiv i32 %n, %d
20
21  ; O0: rem.s32
22  ; (This is unfortunately order-sensitive, even though mul is commutative.)
23  ; O2: mul.lo.s32 [[mul:%r[0-9]+]], [[quot]], [[den]];
24  ; O2: sub.s32 [[rem:%r[0-9]+]], [[num]], [[mul]]
25  %rem = srem i32 %n, %d
26
27  ; O2: st{{.*}}[[quot]]
28  store i32 %quot, i32* %quot_ret
29  ; O2: st{{.*}}[[rem]]
30  store i32 %rem, i32* %rem_ret
31  ret void
32}
33
34; CHECK-LABEL: udiv32(
35define void @udiv32(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
36  ; CHECK: div.u32 [[quot:%r[0-9]+]], [[num:%r[0-9]+]], [[den:%r[0-9]+]];
37  %quot = udiv i32 %n, %d
38
39  ; O0: rem.u32
40
41  ; Selection DAG doesn't know whether this is signed or unsigned
42  ; multiplication and subtraction, but it doesn't make a difference either
43  ; way.
44  ; O2: mul.lo.{{u|s}}32 [[mul:%r[0-9]+]], [[quot]], [[den]];
45  ; O2: sub.{{u|s}}32 [[rem:%r[0-9]+]], [[num]], [[mul]]
46  %rem = urem i32 %n, %d
47
48  ; O2: st{{.*}}[[quot]]
49  store i32 %quot, i32* %quot_ret
50  ; O2: st{{.*}}[[rem]]
51  store i32 %rem, i32* %rem_ret
52  ret void
53}
54
55; Check that we don't perform this optimization if one operation is signed and
56; the other isn't.
57; CHECK-LABEL: mismatched_types1(
58define void @mismatched_types1(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
59  ; CHECK: div.u32
60  ; CHECK: rem.s32
61  %quot = udiv i32 %n, %d
62  %rem = srem i32 %n, %d
63  store i32 %quot, i32* %quot_ret
64  store i32 %rem, i32* %rem_ret
65  ret void
66}
67
68; CHECK-LABEL: mismatched_types2(
69define void @mismatched_types2(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
70  ; CHECK: div.s32
71  ; CHECK: rem.u32
72  %quot = sdiv i32 %n, %d
73  %rem = urem i32 %n, %d
74  store i32 %quot, i32* %quot_ret
75  store i32 %rem, i32* %rem_ret
76  ret void
77}
78
79; Check that we don't perform this optimization if the inputs to the div don't
80; match the inputs to the rem.
81; CHECK-LABEL: mismatched_inputs1(
82define void @mismatched_inputs1(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
83  ; CHECK: div.s32
84  ; CHECK: rem.s32
85  %quot = sdiv i32 %n, %d
86  %rem = srem i32 %d, %n
87  store i32 %quot, i32* %quot_ret
88  store i32 %rem, i32* %rem_ret
89  ret void
90}
91
92; CHECK-LABEL: mismatched_inputs2(
93define void @mismatched_inputs2(i32 %n1, i32 %n2, i32 %d, i32* %quot_ret, i32* %rem_ret) {
94  ; CHECK: div.s32
95  ; CHECK: rem.s32
96  %quot = sdiv i32 %n1, %d
97  %rem = srem i32 %n2, %d
98  store i32 %quot, i32* %quot_ret
99  store i32 %rem, i32* %rem_ret
100  ret void
101}
102
103; CHECK-LABEL: mismatched_inputs3(
104define void @mismatched_inputs3(i32 %n, i32 %d1, i32 %d2, i32* %quot_ret, i32* %rem_ret) {
105  ; CHECK: div.s32
106  ; CHECK: rem.s32
107  %quot = sdiv i32 %n, %d1
108  %rem = srem i32 %n, %d2
109  store i32 %quot, i32* %quot_ret
110  store i32 %rem, i32* %rem_ret
111  ret void
112}
113