• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt < %s -instcombine -S | FileCheck %s
2
3target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
4
5; These are UB-free rotate left/right patterns that are narrowed to a smaller bitwidth.
6; See PR34046 and PR16726 for motivating examples:
7; https://bugs.llvm.org/show_bug.cgi?id=34046
8; https://bugs.llvm.org/show_bug.cgi?id=16726
9
10define i16 @rotate_left_16bit(i16 %v, i32 %shift) {
11; CHECK-LABEL: @rotate_left_16bit(
12; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %shift to i16
13; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 15
14; CHECK-NEXT:    [[TMP3:%.*]] = sub i16 0, [[TMP1]]
15; CHECK-NEXT:    [[TMP4:%.*]] = and i16 [[TMP3]], 15
16; CHECK-NEXT:    [[TMP5:%.*]] = lshr i16 %v, [[TMP4]]
17; CHECK-NEXT:    [[TMP6:%.*]] = shl i16 %v, [[TMP2]]
18; CHECK-NEXT:    [[CONV2:%.*]] = or i16 [[TMP5]], [[TMP6]]
19; CHECK-NEXT:    ret i16 [[CONV2]]
20;
21  %and = and i32 %shift, 15
22  %conv = zext i16 %v to i32
23  %shl = shl i32 %conv, %and
24  %sub = sub i32 16, %and
25  %shr = lshr i32 %conv, %sub
26  %or = or i32 %shr, %shl
27  %conv2 = trunc i32 %or to i16
28  ret i16 %conv2
29}
30
31; Commute the 'or' operands and try a vector type.
32
33define <2 x i16> @rotate_left_commute_16bit_vec(<2 x i16> %v, <2 x i32> %shift) {
34; CHECK-LABEL: @rotate_left_commute_16bit_vec(
35; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> %shift to <2 x i16>
36; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i16> [[TMP1]], <i16 15, i16 15>
37; CHECK-NEXT:    [[TMP3:%.*]] = sub <2 x i16> zeroinitializer, [[TMP1]]
38; CHECK-NEXT:    [[TMP4:%.*]] = and <2 x i16> [[TMP3]], <i16 15, i16 15>
39; CHECK-NEXT:    [[TMP5:%.*]] = shl <2 x i16> %v, [[TMP2]]
40; CHECK-NEXT:    [[TMP6:%.*]] = lshr <2 x i16> %v, [[TMP4]]
41; CHECK-NEXT:    [[CONV2:%.*]] = or <2 x i16> [[TMP5]], [[TMP6]]
42; CHECK-NEXT:    ret <2 x i16> [[CONV2]]
43;
44  %and = and <2 x i32> %shift, <i32 15, i32 15>
45  %conv = zext <2 x i16> %v to <2 x i32>
46  %shl = shl <2 x i32> %conv, %and
47  %sub = sub <2 x i32> <i32 16, i32 16>, %and
48  %shr = lshr <2 x i32> %conv, %sub
49  %or = or <2 x i32> %shl, %shr
50  %conv2 = trunc <2 x i32> %or to <2 x i16>
51  ret <2 x i16> %conv2
52}
53
54; Change the size, rotation direction (the subtract is on the left-shift), and mask op.
55
56define i8 @rotate_right_8bit(i8 %v, i3 %shift) {
57; CHECK-LABEL: @rotate_right_8bit(
58; CHECK-NEXT:    [[TMP1:%.*]] = zext i3 %shift to i8
59; CHECK-NEXT:    [[TMP2:%.*]] = sub i3 0, %shift
60; CHECK-NEXT:    [[TMP3:%.*]] = zext i3 [[TMP2]] to i8
61; CHECK-NEXT:    [[TMP4:%.*]] = shl i8 %v, [[TMP3]]
62; CHECK-NEXT:    [[TMP5:%.*]] = lshr i8 %v, [[TMP1]]
63; CHECK-NEXT:    [[CONV2:%.*]] = or i8 [[TMP4]], [[TMP5]]
64; CHECK-NEXT:    ret i8 [[CONV2]]
65;
66  %and = zext i3 %shift to i32
67  %conv = zext i8 %v to i32
68  %shr = lshr i32 %conv, %and
69  %sub = sub i32 8, %and
70  %shl = shl i32 %conv, %sub
71  %or = or i32 %shl, %shr
72  %conv2 = trunc i32 %or to i8
73  ret i8 %conv2
74}
75
76; The shifted value does not need to be a zexted value; here it is masked.
77; The shift mask could be less than the bitwidth, but this is still ok.
78
79define i8 @rotate_right_commute_8bit(i32 %v, i32 %shift) {
80; CHECK-LABEL: @rotate_right_commute_8bit(
81; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %shift to i8
82; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 3
83; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw i8 0, [[TMP2]]
84; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], 7
85; CHECK-NEXT:    [[TMP5:%.*]] = trunc i32 %v to i8
86; CHECK-NEXT:    [[TMP6:%.*]] = lshr i8 [[TMP5]], [[TMP2]]
87; CHECK-NEXT:    [[TMP7:%.*]] = shl i8 [[TMP5]], [[TMP4]]
88; CHECK-NEXT:    [[CONV2:%.*]] = or i8 [[TMP6]], [[TMP7]]
89; CHECK-NEXT:    ret i8 [[CONV2]]
90;
91  %and = and i32 %shift, 3
92  %conv = and i32 %v, 255
93  %shr = lshr i32 %conv, %and
94  %sub = sub i32 8, %and
95  %shl = shl i32 %conv, %sub
96  %or = or i32 %shr, %shl
97  %conv2 = trunc i32 %or to i8
98  ret i8 %conv2
99}
100
101; If the original source does not mask the shift amount,
102; we still do the transform by adding masks to make it safe.
103
104define i8 @rotate8_not_safe(i8 %v, i32 %shamt) {
105; CHECK-LABEL: @rotate8_not_safe(
106; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %shamt to i8
107; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, [[TMP1]]
108; CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP1]], 7
109; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP2]], 7
110; CHECK-NEXT:    [[TMP5:%.*]] = lshr i8 %v, [[TMP4]]
111; CHECK-NEXT:    [[TMP6:%.*]] = shl i8 %v, [[TMP3]]
112; CHECK-NEXT:    [[RET:%.*]] = or i8 [[TMP5]], [[TMP6]]
113; CHECK-NEXT:    ret i8 [[RET]]
114;
115  %conv = zext i8 %v to i32
116  %sub = sub i32 8, %shamt
117  %shr = lshr i32 %conv, %sub
118  %shl = shl i32 %conv, %shamt
119  %or = or i32 %shr, %shl
120  %ret = trunc i32 %or to i8
121  ret i8 %ret
122}
123
124; The next two tests mask sure we narrower (x << (x & 15)) | (x >> (-x & 15))
125; when types have been promoted.
126; FIXME: We should be able to narrow this.
127
128define i16 @rotate16_neg_mask(i16 %v, i16 %shamt) {
129; CHECK-LABEL: @rotate16_neg_mask(
130; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[V:%.*]] to i32
131; CHECK-NEXT:    [[RSHAMT:%.*]] = and i16 [[SHAMT:%.*]], 15
132; CHECK-NEXT:    [[RSHAMTCONV:%.*]] = zext i16 [[RSHAMT]] to i32
133; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[CONV]], [[RSHAMTCONV]]
134; CHECK-NEXT:    [[NEG:%.*]] = sub i16 0, [[SHAMT]]
135; CHECK-NEXT:    [[LSHAMT:%.*]] = and i16 [[NEG]], 15
136; CHECK-NEXT:    [[LSHAMTCONV:%.*]] = zext i16 [[LSHAMT]] to i32
137; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[CONV]], [[LSHAMTCONV]]
138; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
139; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[OR]] to i16
140; CHECK-NEXT:    ret i16 [[RET]]
141;
142  %conv = zext i16 %v to i32
143  %rshamt = and i16 %shamt, 15
144  %rshamtconv = zext i16 %rshamt to i32
145  %shr = lshr i32 %conv, %rshamtconv
146  %neg = sub i16 0, %shamt
147  %lshamt = and i16 %neg, 15
148  %lshamtconv = zext i16 %lshamt to i32
149  %shl = shl i32 %conv, %lshamtconv
150  %or = or i32 %shr, %shl
151  %ret = trunc i32 %or to i16
152  ret i16 %ret
153}
154
155define i8 @rotate8_neg_mask(i8 %v, i8 %shamt) {
156; CHECK-LABEL: @rotate8_neg_mask(
157; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[V:%.*]] to i32
158; CHECK-NEXT:    [[RSHAMT:%.*]] = and i8 [[SHAMT:%.*]], 7
159; CHECK-NEXT:    [[RSHAMTCONV:%.*]] = zext i8 [[RSHAMT]] to i32
160; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[CONV]], [[RSHAMTCONV]]
161; CHECK-NEXT:    [[NEG:%.*]] = sub i8 0, [[SHAMT]]
162; CHECK-NEXT:    [[LSHAMT:%.*]] = and i8 [[NEG]], 7
163; CHECK-NEXT:    [[LSHAMTCONV:%.*]] = zext i8 [[LSHAMT]] to i32
164; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[CONV]], [[LSHAMTCONV]]
165; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
166; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[OR]] to i8
167; CHECK-NEXT:    ret i8 [[RET]]
168;
169  %conv = zext i8 %v to i32
170  %rshamt = and i8 %shamt, 7
171  %rshamtconv = zext i8 %rshamt to i32
172  %shr = lshr i32 %conv, %rshamtconv
173  %neg = sub i8 0, %shamt
174  %lshamt = and i8 %neg, 7
175  %lshamtconv = zext i8 %lshamt to i32
176  %shl = shl i32 %conv, %lshamtconv
177  %or = or i32 %shr, %shl
178  %ret = trunc i32 %or to i8
179  ret i8 %ret
180}
181
182; The next two types have a shift amount that is already i32 so we would still
183; need a truncate for it going into the rotate pattern.
184; FIXME: We can narrow this, but we would still need a trunc on the shift amt.
185
186define i16 @rotate16_neg_mask_wide_amount(i16 %v, i32 %shamt) {
187; CHECK-LABEL: @rotate16_neg_mask_wide_amount(
188; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[V:%.*]] to i32
189; CHECK-NEXT:    [[RSHAMT:%.*]] = and i32 [[SHAMT:%.*]], 15
190; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[CONV]], [[RSHAMT]]
191; CHECK-NEXT:    [[NEG:%.*]] = sub i32 0, [[SHAMT]]
192; CHECK-NEXT:    [[LSHAMT:%.*]] = and i32 [[NEG]], 15
193; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[CONV]], [[LSHAMT]]
194; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
195; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[OR]] to i16
196; CHECK-NEXT:    ret i16 [[RET]]
197;
198  %conv = zext i16 %v to i32
199  %rshamt = and i32 %shamt, 15
200  %shr = lshr i32 %conv, %rshamt
201  %neg = sub i32 0, %shamt
202  %lshamt = and i32 %neg, 15
203  %shl = shl i32 %conv, %lshamt
204  %or = or i32 %shr, %shl
205  %ret = trunc i32 %or to i16
206  ret i16 %ret
207}
208
209define i8 @rotate8_neg_mask_wide_amount(i8 %v, i32 %shamt) {
210; CHECK-LABEL: @rotate8_neg_mask_wide_amount(
211; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[V:%.*]] to i32
212; CHECK-NEXT:    [[RSHAMT:%.*]] = and i32 [[SHAMT:%.*]], 7
213; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[CONV]], [[RSHAMT]]
214; CHECK-NEXT:    [[NEG:%.*]] = sub i32 0, [[SHAMT]]
215; CHECK-NEXT:    [[LSHAMT:%.*]] = and i32 [[NEG]], 7
216; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[CONV]], [[LSHAMT]]
217; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
218; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[OR]] to i8
219; CHECK-NEXT:    ret i8 [[RET]]
220;
221  %conv = zext i8 %v to i32
222  %rshamt = and i32 %shamt, 7
223  %shr = lshr i32 %conv, %rshamt
224  %neg = sub i32 0, %shamt
225  %lshamt = and i32 %neg, 7
226  %shl = shl i32 %conv, %lshamt
227  %or = or i32 %shr, %shl
228  %ret = trunc i32 %or to i8
229  ret i8 %ret
230}
231