• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
3; RUN:		-mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9LE
4; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
5; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9BE
6; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
7; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8LE
8; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
9; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8BE
10
11define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
12; P9LE-LABEL: fold_srem_vec_1:
13; P9LE:       # %bb.0:
14; P9LE-NEXT:    li r3, 0
15; P9LE-NEXT:    lis r4, -21386
16; P9LE-NEXT:    vextuhrx r3, r3, v2
17; P9LE-NEXT:    ori r4, r4, 37253
18; P9LE-NEXT:    extsh r3, r3
19; P9LE-NEXT:    mulhw r4, r3, r4
20; P9LE-NEXT:    add r4, r4, r3
21; P9LE-NEXT:    srwi r5, r4, 31
22; P9LE-NEXT:    srawi r4, r4, 6
23; P9LE-NEXT:    add r4, r4, r5
24; P9LE-NEXT:    mulli r4, r4, 95
25; P9LE-NEXT:    sub r3, r3, r4
26; P9LE-NEXT:    lis r4, 31710
27; P9LE-NEXT:    mtvsrd v3, r3
28; P9LE-NEXT:    li r3, 2
29; P9LE-NEXT:    ori r4, r4, 63421
30; P9LE-NEXT:    vextuhrx r3, r3, v2
31; P9LE-NEXT:    extsh r3, r3
32; P9LE-NEXT:    mulhw r4, r3, r4
33; P9LE-NEXT:    sub r4, r4, r3
34; P9LE-NEXT:    srwi r5, r4, 31
35; P9LE-NEXT:    srawi r4, r4, 6
36; P9LE-NEXT:    add r4, r4, r5
37; P9LE-NEXT:    mulli r4, r4, -124
38; P9LE-NEXT:    sub r3, r3, r4
39; P9LE-NEXT:    lis r4, 21399
40; P9LE-NEXT:    mtvsrd v4, r3
41; P9LE-NEXT:    li r3, 4
42; P9LE-NEXT:    ori r4, r4, 33437
43; P9LE-NEXT:    vextuhrx r3, r3, v2
44; P9LE-NEXT:    vmrghh v3, v4, v3
45; P9LE-NEXT:    extsh r3, r3
46; P9LE-NEXT:    mulhw r4, r3, r4
47; P9LE-NEXT:    srwi r5, r4, 31
48; P9LE-NEXT:    srawi r4, r4, 5
49; P9LE-NEXT:    add r4, r4, r5
50; P9LE-NEXT:    mulli r4, r4, 98
51; P9LE-NEXT:    sub r3, r3, r4
52; P9LE-NEXT:    lis r4, -16728
53; P9LE-NEXT:    mtvsrd v4, r3
54; P9LE-NEXT:    li r3, 6
55; P9LE-NEXT:    ori r4, r4, 63249
56; P9LE-NEXT:    vextuhrx r3, r3, v2
57; P9LE-NEXT:    extsh r3, r3
58; P9LE-NEXT:    mulhw r4, r3, r4
59; P9LE-NEXT:    srwi r5, r4, 31
60; P9LE-NEXT:    srawi r4, r4, 8
61; P9LE-NEXT:    add r4, r4, r5
62; P9LE-NEXT:    mulli r4, r4, -1003
63; P9LE-NEXT:    sub r3, r3, r4
64; P9LE-NEXT:    mtvsrd v2, r3
65; P9LE-NEXT:    vmrghh v2, v2, v4
66; P9LE-NEXT:    vmrglw v2, v2, v3
67; P9LE-NEXT:    blr
68;
69; P9BE-LABEL: fold_srem_vec_1:
70; P9BE:       # %bb.0:
71; P9BE-NEXT:    li r3, 2
72; P9BE-NEXT:    lis r4, 31710
73; P9BE-NEXT:    vextuhlx r3, r3, v2
74; P9BE-NEXT:    ori r4, r4, 63421
75; P9BE-NEXT:    extsh r3, r3
76; P9BE-NEXT:    mulhw r4, r3, r4
77; P9BE-NEXT:    sub r4, r4, r3
78; P9BE-NEXT:    srwi r5, r4, 31
79; P9BE-NEXT:    srawi r4, r4, 6
80; P9BE-NEXT:    add r4, r4, r5
81; P9BE-NEXT:    mulli r4, r4, -124
82; P9BE-NEXT:    sub r3, r3, r4
83; P9BE-NEXT:    lis r4, -21386
84; P9BE-NEXT:    sldi r3, r3, 48
85; P9BE-NEXT:    ori r4, r4, 37253
86; P9BE-NEXT:    mtvsrd v3, r3
87; P9BE-NEXT:    li r3, 0
88; P9BE-NEXT:    vextuhlx r3, r3, v2
89; P9BE-NEXT:    extsh r3, r3
90; P9BE-NEXT:    mulhw r4, r3, r4
91; P9BE-NEXT:    add r4, r4, r3
92; P9BE-NEXT:    srwi r5, r4, 31
93; P9BE-NEXT:    srawi r4, r4, 6
94; P9BE-NEXT:    add r4, r4, r5
95; P9BE-NEXT:    mulli r4, r4, 95
96; P9BE-NEXT:    sub r3, r3, r4
97; P9BE-NEXT:    lis r4, -16728
98; P9BE-NEXT:    sldi r3, r3, 48
99; P9BE-NEXT:    ori r4, r4, 63249
100; P9BE-NEXT:    mtvsrd v4, r3
101; P9BE-NEXT:    li r3, 6
102; P9BE-NEXT:    vextuhlx r3, r3, v2
103; P9BE-NEXT:    vmrghh v3, v4, v3
104; P9BE-NEXT:    extsh r3, r3
105; P9BE-NEXT:    mulhw r4, r3, r4
106; P9BE-NEXT:    srwi r5, r4, 31
107; P9BE-NEXT:    srawi r4, r4, 8
108; P9BE-NEXT:    add r4, r4, r5
109; P9BE-NEXT:    mulli r4, r4, -1003
110; P9BE-NEXT:    sub r3, r3, r4
111; P9BE-NEXT:    lis r4, 21399
112; P9BE-NEXT:    sldi r3, r3, 48
113; P9BE-NEXT:    ori r4, r4, 33437
114; P9BE-NEXT:    mtvsrd v4, r3
115; P9BE-NEXT:    li r3, 4
116; P9BE-NEXT:    vextuhlx r3, r3, v2
117; P9BE-NEXT:    extsh r3, r3
118; P9BE-NEXT:    mulhw r4, r3, r4
119; P9BE-NEXT:    srwi r5, r4, 31
120; P9BE-NEXT:    srawi r4, r4, 5
121; P9BE-NEXT:    add r4, r4, r5
122; P9BE-NEXT:    mulli r4, r4, 98
123; P9BE-NEXT:    sub r3, r3, r4
124; P9BE-NEXT:    sldi r3, r3, 48
125; P9BE-NEXT:    mtvsrd v2, r3
126; P9BE-NEXT:    vmrghh v2, v2, v4
127; P9BE-NEXT:    vmrghw v2, v3, v2
128; P9BE-NEXT:    blr
129;
130; P8LE-LABEL: fold_srem_vec_1:
131; P8LE:       # %bb.0:
132; P8LE-NEXT:    xxswapd vs0, v2
133; P8LE-NEXT:    lis r3, 21399
134; P8LE-NEXT:    lis r8, -16728
135; P8LE-NEXT:    lis r9, -21386
136; P8LE-NEXT:    lis r10, 31710
137; P8LE-NEXT:    ori r3, r3, 33437
138; P8LE-NEXT:    ori r8, r8, 63249
139; P8LE-NEXT:    ori r9, r9, 37253
140; P8LE-NEXT:    ori r10, r10, 63421
141; P8LE-NEXT:    mffprd r4, f0
142; P8LE-NEXT:    rldicl r5, r4, 32, 48
143; P8LE-NEXT:    rldicl r6, r4, 16, 48
144; P8LE-NEXT:    clrldi r7, r4, 48
145; P8LE-NEXT:    extsh r5, r5
146; P8LE-NEXT:    extsh r6, r6
147; P8LE-NEXT:    rldicl r4, r4, 48, 48
148; P8LE-NEXT:    extsh r7, r7
149; P8LE-NEXT:    mulhw r3, r5, r3
150; P8LE-NEXT:    extsh r4, r4
151; P8LE-NEXT:    mulhw r8, r6, r8
152; P8LE-NEXT:    mulhw r9, r7, r9
153; P8LE-NEXT:    mulhw r10, r4, r10
154; P8LE-NEXT:    srwi r11, r3, 31
155; P8LE-NEXT:    srawi r3, r3, 5
156; P8LE-NEXT:    add r3, r3, r11
157; P8LE-NEXT:    srwi r11, r8, 31
158; P8LE-NEXT:    add r9, r9, r7
159; P8LE-NEXT:    srawi r8, r8, 8
160; P8LE-NEXT:    sub r10, r10, r4
161; P8LE-NEXT:    add r8, r8, r11
162; P8LE-NEXT:    srwi r11, r9, 31
163; P8LE-NEXT:    srawi r9, r9, 6
164; P8LE-NEXT:    mulli r3, r3, 98
165; P8LE-NEXT:    add r9, r9, r11
166; P8LE-NEXT:    srwi r11, r10, 31
167; P8LE-NEXT:    srawi r10, r10, 6
168; P8LE-NEXT:    mulli r8, r8, -1003
169; P8LE-NEXT:    add r10, r10, r11
170; P8LE-NEXT:    mulli r9, r9, 95
171; P8LE-NEXT:    mulli r10, r10, -124
172; P8LE-NEXT:    sub r3, r5, r3
173; P8LE-NEXT:    mtvsrd v2, r3
174; P8LE-NEXT:    sub r5, r6, r8
175; P8LE-NEXT:    sub r3, r7, r9
176; P8LE-NEXT:    mtvsrd v3, r5
177; P8LE-NEXT:    sub r4, r4, r10
178; P8LE-NEXT:    mtvsrd v4, r3
179; P8LE-NEXT:    mtvsrd v5, r4
180; P8LE-NEXT:    vmrghh v2, v3, v2
181; P8LE-NEXT:    vmrghh v3, v5, v4
182; P8LE-NEXT:    vmrglw v2, v2, v3
183; P8LE-NEXT:    blr
184;
185; P8BE-LABEL: fold_srem_vec_1:
186; P8BE:       # %bb.0:
187; P8BE-NEXT:    mfvsrd r4, v2
188; P8BE-NEXT:    lis r3, -16728
189; P8BE-NEXT:    lis r8, 21399
190; P8BE-NEXT:    lis r9, 31710
191; P8BE-NEXT:    lis r10, -21386
192; P8BE-NEXT:    ori r3, r3, 63249
193; P8BE-NEXT:    ori r8, r8, 33437
194; P8BE-NEXT:    ori r9, r9, 63421
195; P8BE-NEXT:    ori r10, r10, 37253
196; P8BE-NEXT:    clrldi r5, r4, 48
197; P8BE-NEXT:    rldicl r6, r4, 48, 48
198; P8BE-NEXT:    rldicl r7, r4, 32, 48
199; P8BE-NEXT:    extsh r5, r5
200; P8BE-NEXT:    extsh r6, r6
201; P8BE-NEXT:    rldicl r4, r4, 16, 48
202; P8BE-NEXT:    extsh r7, r7
203; P8BE-NEXT:    mulhw r3, r5, r3
204; P8BE-NEXT:    extsh r4, r4
205; P8BE-NEXT:    mulhw r8, r6, r8
206; P8BE-NEXT:    mulhw r9, r7, r9
207; P8BE-NEXT:    mulhw r10, r4, r10
208; P8BE-NEXT:    srwi r11, r3, 31
209; P8BE-NEXT:    srawi r3, r3, 8
210; P8BE-NEXT:    add r3, r3, r11
211; P8BE-NEXT:    srwi r11, r8, 31
212; P8BE-NEXT:    sub r9, r9, r7
213; P8BE-NEXT:    srawi r8, r8, 5
214; P8BE-NEXT:    add r10, r10, r4
215; P8BE-NEXT:    add r8, r8, r11
216; P8BE-NEXT:    srwi r11, r9, 31
217; P8BE-NEXT:    srawi r9, r9, 6
218; P8BE-NEXT:    mulli r3, r3, -1003
219; P8BE-NEXT:    add r9, r9, r11
220; P8BE-NEXT:    srwi r11, r10, 31
221; P8BE-NEXT:    srawi r10, r10, 6
222; P8BE-NEXT:    mulli r8, r8, 98
223; P8BE-NEXT:    add r10, r10, r11
224; P8BE-NEXT:    mulli r9, r9, -124
225; P8BE-NEXT:    mulli r10, r10, 95
226; P8BE-NEXT:    sub r3, r5, r3
227; P8BE-NEXT:    sldi r3, r3, 48
228; P8BE-NEXT:    sub r5, r6, r8
229; P8BE-NEXT:    mtvsrd v2, r3
230; P8BE-NEXT:    sub r6, r7, r9
231; P8BE-NEXT:    sldi r3, r5, 48
232; P8BE-NEXT:    sub r4, r4, r10
233; P8BE-NEXT:    mtvsrd v3, r3
234; P8BE-NEXT:    sldi r3, r6, 48
235; P8BE-NEXT:    sldi r4, r4, 48
236; P8BE-NEXT:    mtvsrd v4, r3
237; P8BE-NEXT:    mtvsrd v5, r4
238; P8BE-NEXT:    vmrghh v2, v3, v2
239; P8BE-NEXT:    vmrghh v3, v5, v4
240; P8BE-NEXT:    vmrghw v2, v3, v2
241; P8BE-NEXT:    blr
242  %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
243  ret <4 x i16> %1
244}
245
246define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
247; P9LE-LABEL: fold_srem_vec_2:
248; P9LE:       # %bb.0:
249; P9LE-NEXT:    li r3, 0
250; P9LE-NEXT:    lis r4, -21386
251; P9LE-NEXT:    vextuhrx r3, r3, v2
252; P9LE-NEXT:    ori r4, r4, 37253
253; P9LE-NEXT:    extsh r3, r3
254; P9LE-NEXT:    mulhw r5, r3, r4
255; P9LE-NEXT:    add r5, r5, r3
256; P9LE-NEXT:    srwi r6, r5, 31
257; P9LE-NEXT:    srawi r5, r5, 6
258; P9LE-NEXT:    add r5, r5, r6
259; P9LE-NEXT:    mulli r5, r5, 95
260; P9LE-NEXT:    sub r3, r3, r5
261; P9LE-NEXT:    mtvsrd v3, r3
262; P9LE-NEXT:    li r3, 2
263; P9LE-NEXT:    vextuhrx r3, r3, v2
264; P9LE-NEXT:    extsh r3, r3
265; P9LE-NEXT:    mulhw r5, r3, r4
266; P9LE-NEXT:    add r5, r5, r3
267; P9LE-NEXT:    srwi r6, r5, 31
268; P9LE-NEXT:    srawi r5, r5, 6
269; P9LE-NEXT:    add r5, r5, r6
270; P9LE-NEXT:    mulli r5, r5, 95
271; P9LE-NEXT:    sub r3, r3, r5
272; P9LE-NEXT:    mtvsrd v4, r3
273; P9LE-NEXT:    li r3, 4
274; P9LE-NEXT:    vextuhrx r3, r3, v2
275; P9LE-NEXT:    vmrghh v3, v4, v3
276; P9LE-NEXT:    extsh r3, r3
277; P9LE-NEXT:    mulhw r5, r3, r4
278; P9LE-NEXT:    add r5, r5, r3
279; P9LE-NEXT:    srwi r6, r5, 31
280; P9LE-NEXT:    srawi r5, r5, 6
281; P9LE-NEXT:    add r5, r5, r6
282; P9LE-NEXT:    mulli r5, r5, 95
283; P9LE-NEXT:    sub r3, r3, r5
284; P9LE-NEXT:    mtvsrd v4, r3
285; P9LE-NEXT:    li r3, 6
286; P9LE-NEXT:    vextuhrx r3, r3, v2
287; P9LE-NEXT:    extsh r3, r3
288; P9LE-NEXT:    mulhw r4, r3, r4
289; P9LE-NEXT:    add r4, r4, r3
290; P9LE-NEXT:    srwi r5, r4, 31
291; P9LE-NEXT:    srawi r4, r4, 6
292; P9LE-NEXT:    add r4, r4, r5
293; P9LE-NEXT:    mulli r4, r4, 95
294; P9LE-NEXT:    sub r3, r3, r4
295; P9LE-NEXT:    mtvsrd v2, r3
296; P9LE-NEXT:    vmrghh v2, v2, v4
297; P9LE-NEXT:    vmrglw v2, v2, v3
298; P9LE-NEXT:    blr
299;
300; P9BE-LABEL: fold_srem_vec_2:
301; P9BE:       # %bb.0:
302; P9BE-NEXT:    li r3, 6
303; P9BE-NEXT:    lis r4, -21386
304; P9BE-NEXT:    vextuhlx r3, r3, v2
305; P9BE-NEXT:    ori r4, r4, 37253
306; P9BE-NEXT:    extsh r3, r3
307; P9BE-NEXT:    mulhw r5, r3, r4
308; P9BE-NEXT:    add r5, r5, r3
309; P9BE-NEXT:    srwi r6, r5, 31
310; P9BE-NEXT:    srawi r5, r5, 6
311; P9BE-NEXT:    add r5, r5, r6
312; P9BE-NEXT:    mulli r5, r5, 95
313; P9BE-NEXT:    sub r3, r3, r5
314; P9BE-NEXT:    sldi r3, r3, 48
315; P9BE-NEXT:    mtvsrd v3, r3
316; P9BE-NEXT:    li r3, 4
317; P9BE-NEXT:    vextuhlx r3, r3, v2
318; P9BE-NEXT:    extsh r3, r3
319; P9BE-NEXT:    mulhw r5, r3, r4
320; P9BE-NEXT:    add r5, r5, r3
321; P9BE-NEXT:    srwi r6, r5, 31
322; P9BE-NEXT:    srawi r5, r5, 6
323; P9BE-NEXT:    add r5, r5, r6
324; P9BE-NEXT:    mulli r5, r5, 95
325; P9BE-NEXT:    sub r3, r3, r5
326; P9BE-NEXT:    sldi r3, r3, 48
327; P9BE-NEXT:    mtvsrd v4, r3
328; P9BE-NEXT:    li r3, 2
329; P9BE-NEXT:    vextuhlx r3, r3, v2
330; P9BE-NEXT:    vmrghh v3, v4, v3
331; P9BE-NEXT:    extsh r3, r3
332; P9BE-NEXT:    mulhw r5, r3, r4
333; P9BE-NEXT:    add r5, r5, r3
334; P9BE-NEXT:    srwi r6, r5, 31
335; P9BE-NEXT:    srawi r5, r5, 6
336; P9BE-NEXT:    add r5, r5, r6
337; P9BE-NEXT:    mulli r5, r5, 95
338; P9BE-NEXT:    sub r3, r3, r5
339; P9BE-NEXT:    sldi r3, r3, 48
340; P9BE-NEXT:    mtvsrd v4, r3
341; P9BE-NEXT:    li r3, 0
342; P9BE-NEXT:    vextuhlx r3, r3, v2
343; P9BE-NEXT:    extsh r3, r3
344; P9BE-NEXT:    mulhw r4, r3, r4
345; P9BE-NEXT:    add r4, r4, r3
346; P9BE-NEXT:    srwi r5, r4, 31
347; P9BE-NEXT:    srawi r4, r4, 6
348; P9BE-NEXT:    add r4, r4, r5
349; P9BE-NEXT:    mulli r4, r4, 95
350; P9BE-NEXT:    sub r3, r3, r4
351; P9BE-NEXT:    sldi r3, r3, 48
352; P9BE-NEXT:    mtvsrd v2, r3
353; P9BE-NEXT:    vmrghh v2, v2, v4
354; P9BE-NEXT:    vmrghw v2, v2, v3
355; P9BE-NEXT:    blr
356;
357; P8LE-LABEL: fold_srem_vec_2:
358; P8LE:       # %bb.0:
359; P8LE-NEXT:    xxswapd vs0, v2
360; P8LE-NEXT:    lis r3, -21386
361; P8LE-NEXT:    ori r3, r3, 37253
362; P8LE-NEXT:    mffprd r4, f0
363; P8LE-NEXT:    clrldi r5, r4, 48
364; P8LE-NEXT:    rldicl r6, r4, 48, 48
365; P8LE-NEXT:    extsh r5, r5
366; P8LE-NEXT:    rldicl r7, r4, 32, 48
367; P8LE-NEXT:    extsh r6, r6
368; P8LE-NEXT:    mulhw r8, r5, r3
369; P8LE-NEXT:    rldicl r4, r4, 16, 48
370; P8LE-NEXT:    extsh r7, r7
371; P8LE-NEXT:    mulhw r9, r6, r3
372; P8LE-NEXT:    extsh r4, r4
373; P8LE-NEXT:    mulhw r10, r7, r3
374; P8LE-NEXT:    mulhw r3, r4, r3
375; P8LE-NEXT:    add r8, r8, r5
376; P8LE-NEXT:    add r9, r9, r6
377; P8LE-NEXT:    srwi r11, r8, 31
378; P8LE-NEXT:    srawi r8, r8, 6
379; P8LE-NEXT:    add r10, r10, r7
380; P8LE-NEXT:    add r3, r3, r4
381; P8LE-NEXT:    add r8, r8, r11
382; P8LE-NEXT:    srwi r11, r9, 31
383; P8LE-NEXT:    srawi r9, r9, 6
384; P8LE-NEXT:    mulli r8, r8, 95
385; P8LE-NEXT:    add r9, r9, r11
386; P8LE-NEXT:    srwi r11, r10, 31
387; P8LE-NEXT:    srawi r10, r10, 6
388; P8LE-NEXT:    mulli r9, r9, 95
389; P8LE-NEXT:    add r10, r10, r11
390; P8LE-NEXT:    srwi r11, r3, 31
391; P8LE-NEXT:    srawi r3, r3, 6
392; P8LE-NEXT:    mulli r10, r10, 95
393; P8LE-NEXT:    sub r5, r5, r8
394; P8LE-NEXT:    add r3, r3, r11
395; P8LE-NEXT:    mtvsrd v2, r5
396; P8LE-NEXT:    mulli r3, r3, 95
397; P8LE-NEXT:    sub r6, r6, r9
398; P8LE-NEXT:    mtvsrd v3, r6
399; P8LE-NEXT:    sub r5, r7, r10
400; P8LE-NEXT:    mtvsrd v4, r5
401; P8LE-NEXT:    sub r3, r4, r3
402; P8LE-NEXT:    vmrghh v2, v3, v2
403; P8LE-NEXT:    mtvsrd v5, r3
404; P8LE-NEXT:    vmrghh v3, v5, v4
405; P8LE-NEXT:    vmrglw v2, v3, v2
406; P8LE-NEXT:    blr
407;
408; P8BE-LABEL: fold_srem_vec_2:
409; P8BE:       # %bb.0:
410; P8BE-NEXT:    mfvsrd r4, v2
411; P8BE-NEXT:    lis r3, -21386
412; P8BE-NEXT:    ori r3, r3, 37253
413; P8BE-NEXT:    clrldi r5, r4, 48
414; P8BE-NEXT:    rldicl r6, r4, 48, 48
415; P8BE-NEXT:    extsh r5, r5
416; P8BE-NEXT:    rldicl r7, r4, 32, 48
417; P8BE-NEXT:    extsh r6, r6
418; P8BE-NEXT:    mulhw r8, r5, r3
419; P8BE-NEXT:    rldicl r4, r4, 16, 48
420; P8BE-NEXT:    extsh r7, r7
421; P8BE-NEXT:    mulhw r9, r6, r3
422; P8BE-NEXT:    extsh r4, r4
423; P8BE-NEXT:    mulhw r10, r7, r3
424; P8BE-NEXT:    mulhw r3, r4, r3
425; P8BE-NEXT:    add r8, r8, r5
426; P8BE-NEXT:    add r9, r9, r6
427; P8BE-NEXT:    srwi r11, r8, 31
428; P8BE-NEXT:    srawi r8, r8, 6
429; P8BE-NEXT:    add r10, r10, r7
430; P8BE-NEXT:    add r3, r3, r4
431; P8BE-NEXT:    add r8, r8, r11
432; P8BE-NEXT:    srwi r11, r9, 31
433; P8BE-NEXT:    srawi r9, r9, 6
434; P8BE-NEXT:    mulli r8, r8, 95
435; P8BE-NEXT:    add r9, r9, r11
436; P8BE-NEXT:    srwi r11, r10, 31
437; P8BE-NEXT:    srawi r10, r10, 6
438; P8BE-NEXT:    mulli r9, r9, 95
439; P8BE-NEXT:    add r10, r10, r11
440; P8BE-NEXT:    srwi r11, r3, 31
441; P8BE-NEXT:    srawi r3, r3, 6
442; P8BE-NEXT:    mulli r10, r10, 95
443; P8BE-NEXT:    sub r5, r5, r8
444; P8BE-NEXT:    add r3, r3, r11
445; P8BE-NEXT:    sldi r5, r5, 48
446; P8BE-NEXT:    mulli r3, r3, 95
447; P8BE-NEXT:    sub r6, r6, r9
448; P8BE-NEXT:    mtvsrd v2, r5
449; P8BE-NEXT:    sldi r6, r6, 48
450; P8BE-NEXT:    sub r7, r7, r10
451; P8BE-NEXT:    mtvsrd v3, r6
452; P8BE-NEXT:    sub r3, r4, r3
453; P8BE-NEXT:    sldi r4, r7, 48
454; P8BE-NEXT:    vmrghh v2, v3, v2
455; P8BE-NEXT:    sldi r3, r3, 48
456; P8BE-NEXT:    mtvsrd v4, r4
457; P8BE-NEXT:    mtvsrd v5, r3
458; P8BE-NEXT:    vmrghh v3, v5, v4
459; P8BE-NEXT:    vmrghw v2, v3, v2
460; P8BE-NEXT:    blr
461  %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
462  ret <4 x i16> %1
463}
464
465
466; Don't fold if we can combine srem with sdiv.
467define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
468; P9LE-LABEL: combine_srem_sdiv:
469; P9LE:       # %bb.0:
470; P9LE-NEXT:    li r3, 0
471; P9LE-NEXT:    lis r4, -21386
472; P9LE-NEXT:    vextuhrx r3, r3, v2
473; P9LE-NEXT:    ori r4, r4, 37253
474; P9LE-NEXT:    extsh r3, r3
475; P9LE-NEXT:    mulhw r5, r3, r4
476; P9LE-NEXT:    add r5, r5, r3
477; P9LE-NEXT:    srwi r6, r5, 31
478; P9LE-NEXT:    srawi r5, r5, 6
479; P9LE-NEXT:    add r5, r5, r6
480; P9LE-NEXT:    mulli r6, r5, 95
481; P9LE-NEXT:    sub r3, r3, r6
482; P9LE-NEXT:    mtvsrd v3, r3
483; P9LE-NEXT:    li r3, 2
484; P9LE-NEXT:    vextuhrx r3, r3, v2
485; P9LE-NEXT:    extsh r6, r3
486; P9LE-NEXT:    mulhw r7, r6, r4
487; P9LE-NEXT:    add r6, r7, r6
488; P9LE-NEXT:    srwi r7, r6, 31
489; P9LE-NEXT:    srawi r6, r6, 6
490; P9LE-NEXT:    add r6, r6, r7
491; P9LE-NEXT:    mulli r7, r6, 95
492; P9LE-NEXT:    sub r3, r3, r7
493; P9LE-NEXT:    mtvsrd v4, r3
494; P9LE-NEXT:    li r3, 4
495; P9LE-NEXT:    vextuhrx r3, r3, v2
496; P9LE-NEXT:    vmrghh v3, v4, v3
497; P9LE-NEXT:    extsh r7, r3
498; P9LE-NEXT:    mulhw r8, r7, r4
499; P9LE-NEXT:    add r7, r8, r7
500; P9LE-NEXT:    srwi r8, r7, 31
501; P9LE-NEXT:    srawi r7, r7, 6
502; P9LE-NEXT:    add r7, r7, r8
503; P9LE-NEXT:    mulli r8, r7, 95
504; P9LE-NEXT:    sub r3, r3, r8
505; P9LE-NEXT:    mtvsrd v4, r3
506; P9LE-NEXT:    li r3, 6
507; P9LE-NEXT:    vextuhrx r3, r3, v2
508; P9LE-NEXT:    extsh r8, r3
509; P9LE-NEXT:    mulhw r4, r8, r4
510; P9LE-NEXT:    add r4, r4, r8
511; P9LE-NEXT:    srwi r8, r4, 31
512; P9LE-NEXT:    srawi r4, r4, 6
513; P9LE-NEXT:    add r4, r4, r8
514; P9LE-NEXT:    mulli r8, r4, 95
515; P9LE-NEXT:    mtvsrd v5, r4
516; P9LE-NEXT:    sub r3, r3, r8
517; P9LE-NEXT:    mtvsrd v2, r3
518; P9LE-NEXT:    vmrghh v2, v2, v4
519; P9LE-NEXT:    mtvsrd v4, r6
520; P9LE-NEXT:    vmrglw v2, v2, v3
521; P9LE-NEXT:    mtvsrd v3, r5
522; P9LE-NEXT:    vmrghh v3, v4, v3
523; P9LE-NEXT:    mtvsrd v4, r7
524; P9LE-NEXT:    vmrghh v4, v5, v4
525; P9LE-NEXT:    vmrglw v3, v4, v3
526; P9LE-NEXT:    vadduhm v2, v2, v3
527; P9LE-NEXT:    blr
528;
529; P9BE-LABEL: combine_srem_sdiv:
530; P9BE:       # %bb.0:
531; P9BE-NEXT:    li r3, 6
532; P9BE-NEXT:    lis r5, -21386
533; P9BE-NEXT:    vextuhlx r3, r3, v2
534; P9BE-NEXT:    ori r5, r5, 37253
535; P9BE-NEXT:    extsh r4, r3
536; P9BE-NEXT:    mulhw r6, r4, r5
537; P9BE-NEXT:    add r4, r6, r4
538; P9BE-NEXT:    srwi r6, r4, 31
539; P9BE-NEXT:    srawi r4, r4, 6
540; P9BE-NEXT:    add r4, r4, r6
541; P9BE-NEXT:    mulli r6, r4, 95
542; P9BE-NEXT:    sub r3, r3, r6
543; P9BE-NEXT:    sldi r3, r3, 48
544; P9BE-NEXT:    mtvsrd v3, r3
545; P9BE-NEXT:    li r3, 4
546; P9BE-NEXT:    vextuhlx r3, r3, v2
547; P9BE-NEXT:    extsh r6, r3
548; P9BE-NEXT:    mulhw r7, r6, r5
549; P9BE-NEXT:    add r6, r7, r6
550; P9BE-NEXT:    srwi r7, r6, 31
551; P9BE-NEXT:    srawi r6, r6, 6
552; P9BE-NEXT:    add r6, r6, r7
553; P9BE-NEXT:    mulli r7, r6, 95
554; P9BE-NEXT:    sub r3, r3, r7
555; P9BE-NEXT:    sldi r3, r3, 48
556; P9BE-NEXT:    mtvsrd v4, r3
557; P9BE-NEXT:    li r3, 2
558; P9BE-NEXT:    vextuhlx r3, r3, v2
559; P9BE-NEXT:    vmrghh v3, v4, v3
560; P9BE-NEXT:    extsh r7, r3
561; P9BE-NEXT:    mulhw r8, r7, r5
562; P9BE-NEXT:    add r7, r8, r7
563; P9BE-NEXT:    srwi r8, r7, 31
564; P9BE-NEXT:    srawi r7, r7, 6
565; P9BE-NEXT:    add r7, r7, r8
566; P9BE-NEXT:    mulli r8, r7, 95
567; P9BE-NEXT:    sub r3, r3, r8
568; P9BE-NEXT:    sldi r3, r3, 48
569; P9BE-NEXT:    mtvsrd v4, r3
570; P9BE-NEXT:    li r3, 0
571; P9BE-NEXT:    vextuhlx r3, r3, v2
572; P9BE-NEXT:    extsh r3, r3
573; P9BE-NEXT:    mulhw r5, r3, r5
574; P9BE-NEXT:    add r5, r5, r3
575; P9BE-NEXT:    srwi r8, r5, 31
576; P9BE-NEXT:    srawi r5, r5, 6
577; P9BE-NEXT:    add r5, r5, r8
578; P9BE-NEXT:    mulli r8, r5, 95
579; P9BE-NEXT:    sub r3, r3, r8
580; P9BE-NEXT:    sldi r3, r3, 48
581; P9BE-NEXT:    mtvsrd v2, r3
582; P9BE-NEXT:    sldi r3, r4, 48
583; P9BE-NEXT:    vmrghh v2, v2, v4
584; P9BE-NEXT:    vmrghw v2, v2, v3
585; P9BE-NEXT:    mtvsrd v3, r3
586; P9BE-NEXT:    sldi r3, r6, 48
587; P9BE-NEXT:    mtvsrd v4, r3
588; P9BE-NEXT:    sldi r3, r7, 48
589; P9BE-NEXT:    vmrghh v3, v4, v3
590; P9BE-NEXT:    mtvsrd v4, r3
591; P9BE-NEXT:    sldi r3, r5, 48
592; P9BE-NEXT:    mtvsrd v5, r3
593; P9BE-NEXT:    vmrghh v4, v5, v4
594; P9BE-NEXT:    vmrghw v3, v4, v3
595; P9BE-NEXT:    vadduhm v2, v2, v3
596; P9BE-NEXT:    blr
597;
598; P8LE-LABEL: combine_srem_sdiv:
599; P8LE:       # %bb.0:
600; P8LE-NEXT:    xxswapd vs0, v2
601; P8LE-NEXT:    lis r3, -21386
602; P8LE-NEXT:    ori r3, r3, 37253
603; P8LE-NEXT:    mffprd r4, f0
604; P8LE-NEXT:    clrldi r5, r4, 48
605; P8LE-NEXT:    rldicl r6, r4, 48, 48
606; P8LE-NEXT:    rldicl r7, r4, 32, 48
607; P8LE-NEXT:    extsh r5, r5
608; P8LE-NEXT:    extsh r8, r6
609; P8LE-NEXT:    extsh r9, r7
610; P8LE-NEXT:    mulhw r10, r5, r3
611; P8LE-NEXT:    mulhw r11, r8, r3
612; P8LE-NEXT:    rldicl r4, r4, 16, 48
613; P8LE-NEXT:    mulhw r12, r9, r3
614; P8LE-NEXT:    extsh r0, r4
615; P8LE-NEXT:    mulhw r3, r0, r3
616; P8LE-NEXT:    add r10, r10, r5
617; P8LE-NEXT:    add r8, r11, r8
618; P8LE-NEXT:    srwi r11, r10, 31
619; P8LE-NEXT:    add r9, r12, r9
620; P8LE-NEXT:    srawi r10, r10, 6
621; P8LE-NEXT:    srawi r12, r8, 6
622; P8LE-NEXT:    srwi r8, r8, 31
623; P8LE-NEXT:    add r10, r10, r11
624; P8LE-NEXT:    add r3, r3, r0
625; P8LE-NEXT:    srawi r11, r9, 6
626; P8LE-NEXT:    srwi r9, r9, 31
627; P8LE-NEXT:    add r8, r12, r8
628; P8LE-NEXT:    mtvsrd v2, r10
629; P8LE-NEXT:    mulli r12, r10, 95
630; P8LE-NEXT:    add r9, r11, r9
631; P8LE-NEXT:    srwi r11, r3, 31
632; P8LE-NEXT:    mtvsrd v3, r8
633; P8LE-NEXT:    srawi r3, r3, 6
634; P8LE-NEXT:    mulli r10, r8, 95
635; P8LE-NEXT:    mtvsrd v4, r9
636; P8LE-NEXT:    add r3, r3, r11
637; P8LE-NEXT:    mulli r8, r9, 95
638; P8LE-NEXT:    vmrghh v2, v3, v2
639; P8LE-NEXT:    mulli r9, r3, 95
640; P8LE-NEXT:    sub r5, r5, r12
641; P8LE-NEXT:    sub r6, r6, r10
642; P8LE-NEXT:    mtvsrd v3, r5
643; P8LE-NEXT:    mtvsrd v5, r6
644; P8LE-NEXT:    sub r5, r7, r8
645; P8LE-NEXT:    sub r4, r4, r9
646; P8LE-NEXT:    mtvsrd v0, r5
647; P8LE-NEXT:    mtvsrd v1, r4
648; P8LE-NEXT:    vmrghh v3, v5, v3
649; P8LE-NEXT:    mtvsrd v5, r3
650; P8LE-NEXT:    vmrghh v0, v1, v0
651; P8LE-NEXT:    vmrghh v4, v5, v4
652; P8LE-NEXT:    vmrglw v3, v0, v3
653; P8LE-NEXT:    vmrglw v2, v4, v2
654; P8LE-NEXT:    vadduhm v2, v3, v2
655; P8LE-NEXT:    blr
656;
657; P8BE-LABEL: combine_srem_sdiv:
658; P8BE:       # %bb.0:
659; P8BE-NEXT:    mfvsrd r5, v2
660; P8BE-NEXT:    lis r4, -21386
661; P8BE-NEXT:    ori r4, r4, 37253
662; P8BE-NEXT:    clrldi r3, r5, 48
663; P8BE-NEXT:    rldicl r6, r5, 48, 48
664; P8BE-NEXT:    extsh r8, r3
665; P8BE-NEXT:    rldicl r7, r5, 32, 48
666; P8BE-NEXT:    extsh r9, r6
667; P8BE-NEXT:    rldicl r5, r5, 16, 48
668; P8BE-NEXT:    mulhw r11, r8, r4
669; P8BE-NEXT:    extsh r10, r7
670; P8BE-NEXT:    extsh r5, r5
671; P8BE-NEXT:    mulhw r12, r9, r4
672; P8BE-NEXT:    mulhw r0, r10, r4
673; P8BE-NEXT:    mulhw r4, r5, r4
674; P8BE-NEXT:    add r8, r11, r8
675; P8BE-NEXT:    add r9, r12, r9
676; P8BE-NEXT:    srawi r11, r8, 6
677; P8BE-NEXT:    srwi r8, r8, 31
678; P8BE-NEXT:    add r10, r0, r10
679; P8BE-NEXT:    add r4, r4, r5
680; P8BE-NEXT:    add r8, r11, r8
681; P8BE-NEXT:    srawi r12, r9, 6
682; P8BE-NEXT:    srwi r9, r9, 31
683; P8BE-NEXT:    srawi r0, r10, 6
684; P8BE-NEXT:    srawi r11, r4, 6
685; P8BE-NEXT:    srwi r10, r10, 31
686; P8BE-NEXT:    add r9, r12, r9
687; P8BE-NEXT:    srwi r4, r4, 31
688; P8BE-NEXT:    mulli r12, r8, 95
689; P8BE-NEXT:    add r10, r0, r10
690; P8BE-NEXT:    add r4, r11, r4
691; P8BE-NEXT:    mulli r0, r9, 95
692; P8BE-NEXT:    sldi r9, r9, 48
693; P8BE-NEXT:    sldi r8, r8, 48
694; P8BE-NEXT:    mtvsrd v3, r9
695; P8BE-NEXT:    mulli r9, r4, 95
696; P8BE-NEXT:    mtvsrd v2, r8
697; P8BE-NEXT:    mulli r8, r10, 95
698; P8BE-NEXT:    sldi r10, r10, 48
699; P8BE-NEXT:    sub r3, r3, r12
700; P8BE-NEXT:    mtvsrd v4, r10
701; P8BE-NEXT:    sub r6, r6, r0
702; P8BE-NEXT:    sldi r3, r3, 48
703; P8BE-NEXT:    vmrghh v2, v3, v2
704; P8BE-NEXT:    sldi r6, r6, 48
705; P8BE-NEXT:    mtvsrd v3, r3
706; P8BE-NEXT:    sub r3, r5, r9
707; P8BE-NEXT:    sub r7, r7, r8
708; P8BE-NEXT:    mtvsrd v5, r6
709; P8BE-NEXT:    sldi r3, r3, 48
710; P8BE-NEXT:    sldi r5, r7, 48
711; P8BE-NEXT:    mtvsrd v1, r3
712; P8BE-NEXT:    sldi r3, r4, 48
713; P8BE-NEXT:    mtvsrd v0, r5
714; P8BE-NEXT:    vmrghh v3, v5, v3
715; P8BE-NEXT:    mtvsrd v5, r3
716; P8BE-NEXT:    vmrghh v0, v1, v0
717; P8BE-NEXT:    vmrghh v4, v5, v4
718; P8BE-NEXT:    vmrghw v3, v0, v3
719; P8BE-NEXT:    vmrghw v2, v4, v2
720; P8BE-NEXT:    vadduhm v2, v3, v2
721; P8BE-NEXT:    blr
722  %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
723  %2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
724  %3 = add <4 x i16> %1, %2
725  ret <4 x i16> %3
726}
727
728; Don't fold for divisors that are a power of two.
729define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
730; P9LE-LABEL: dont_fold_srem_power_of_two:
731; P9LE:       # %bb.0:
732; P9LE-NEXT:    li r3, 0
733; P9LE-NEXT:    vextuhrx r3, r3, v2
734; P9LE-NEXT:    extsh r3, r3
735; P9LE-NEXT:    srawi r4, r3, 6
736; P9LE-NEXT:    addze r4, r4
737; P9LE-NEXT:    slwi r4, r4, 6
738; P9LE-NEXT:    sub r3, r3, r4
739; P9LE-NEXT:    mtvsrd v3, r3
740; P9LE-NEXT:    li r3, 2
741; P9LE-NEXT:    vextuhrx r3, r3, v2
742; P9LE-NEXT:    extsh r3, r3
743; P9LE-NEXT:    srawi r4, r3, 5
744; P9LE-NEXT:    addze r4, r4
745; P9LE-NEXT:    slwi r4, r4, 5
746; P9LE-NEXT:    sub r3, r3, r4
747; P9LE-NEXT:    lis r4, -21386
748; P9LE-NEXT:    mtvsrd v4, r3
749; P9LE-NEXT:    li r3, 6
750; P9LE-NEXT:    ori r4, r4, 37253
751; P9LE-NEXT:    vextuhrx r3, r3, v2
752; P9LE-NEXT:    vmrghh v3, v4, v3
753; P9LE-NEXT:    extsh r3, r3
754; P9LE-NEXT:    mulhw r4, r3, r4
755; P9LE-NEXT:    add r4, r4, r3
756; P9LE-NEXT:    srwi r5, r4, 31
757; P9LE-NEXT:    srawi r4, r4, 6
758; P9LE-NEXT:    add r4, r4, r5
759; P9LE-NEXT:    mulli r4, r4, 95
760; P9LE-NEXT:    sub r3, r3, r4
761; P9LE-NEXT:    mtvsrd v4, r3
762; P9LE-NEXT:    li r3, 4
763; P9LE-NEXT:    vextuhrx r3, r3, v2
764; P9LE-NEXT:    extsh r3, r3
765; P9LE-NEXT:    srawi r4, r3, 3
766; P9LE-NEXT:    addze r4, r4
767; P9LE-NEXT:    slwi r4, r4, 3
768; P9LE-NEXT:    sub r3, r3, r4
769; P9LE-NEXT:    mtvsrd v2, r3
770; P9LE-NEXT:    vmrghh v2, v4, v2
771; P9LE-NEXT:    vmrglw v2, v2, v3
772; P9LE-NEXT:    blr
773;
774; P9BE-LABEL: dont_fold_srem_power_of_two:
775; P9BE:       # %bb.0:
776; P9BE-NEXT:    li r3, 2
777; P9BE-NEXT:    vextuhlx r3, r3, v2
778; P9BE-NEXT:    extsh r3, r3
779; P9BE-NEXT:    srawi r4, r3, 5
780; P9BE-NEXT:    addze r4, r4
781; P9BE-NEXT:    slwi r4, r4, 5
782; P9BE-NEXT:    sub r3, r3, r4
783; P9BE-NEXT:    sldi r3, r3, 48
784; P9BE-NEXT:    mtvsrd v3, r3
785; P9BE-NEXT:    li r3, 0
786; P9BE-NEXT:    vextuhlx r3, r3, v2
787; P9BE-NEXT:    extsh r3, r3
788; P9BE-NEXT:    srawi r4, r3, 6
789; P9BE-NEXT:    addze r4, r4
790; P9BE-NEXT:    slwi r4, r4, 6
791; P9BE-NEXT:    sub r3, r3, r4
792; P9BE-NEXT:    lis r4, -21386
793; P9BE-NEXT:    sldi r3, r3, 48
794; P9BE-NEXT:    ori r4, r4, 37253
795; P9BE-NEXT:    mtvsrd v4, r3
796; P9BE-NEXT:    li r3, 6
797; P9BE-NEXT:    vextuhlx r3, r3, v2
798; P9BE-NEXT:    vmrghh v3, v4, v3
799; P9BE-NEXT:    extsh r3, r3
800; P9BE-NEXT:    mulhw r4, r3, r4
801; P9BE-NEXT:    add r4, r4, r3
802; P9BE-NEXT:    srwi r5, r4, 31
803; P9BE-NEXT:    srawi r4, r4, 6
804; P9BE-NEXT:    add r4, r4, r5
805; P9BE-NEXT:    mulli r4, r4, 95
806; P9BE-NEXT:    sub r3, r3, r4
807; P9BE-NEXT:    sldi r3, r3, 48
808; P9BE-NEXT:    mtvsrd v4, r3
809; P9BE-NEXT:    li r3, 4
810; P9BE-NEXT:    vextuhlx r3, r3, v2
811; P9BE-NEXT:    extsh r3, r3
812; P9BE-NEXT:    srawi r4, r3, 3
813; P9BE-NEXT:    addze r4, r4
814; P9BE-NEXT:    slwi r4, r4, 3
815; P9BE-NEXT:    sub r3, r3, r4
816; P9BE-NEXT:    sldi r3, r3, 48
817; P9BE-NEXT:    mtvsrd v2, r3
818; P9BE-NEXT:    vmrghh v2, v2, v4
819; P9BE-NEXT:    vmrghw v2, v3, v2
820; P9BE-NEXT:    blr
821;
822; P8LE-LABEL: dont_fold_srem_power_of_two:
823; P8LE:       # %bb.0:
824; P8LE-NEXT:    xxswapd vs0, v2
825; P8LE-NEXT:    lis r3, -21386
826; P8LE-NEXT:    ori r3, r3, 37253
827; P8LE-NEXT:    mffprd r4, f0
828; P8LE-NEXT:    rldicl r5, r4, 16, 48
829; P8LE-NEXT:    clrldi r6, r4, 48
830; P8LE-NEXT:    extsh r5, r5
831; P8LE-NEXT:    extsh r6, r6
832; P8LE-NEXT:    mulhw r3, r5, r3
833; P8LE-NEXT:    rldicl r7, r4, 48, 48
834; P8LE-NEXT:    srawi r8, r6, 6
835; P8LE-NEXT:    extsh r7, r7
836; P8LE-NEXT:    addze r8, r8
837; P8LE-NEXT:    rldicl r4, r4, 32, 48
838; P8LE-NEXT:    srawi r9, r7, 5
839; P8LE-NEXT:    extsh r4, r4
840; P8LE-NEXT:    slwi r8, r8, 6
841; P8LE-NEXT:    add r3, r3, r5
842; P8LE-NEXT:    addze r9, r9
843; P8LE-NEXT:    sub r6, r6, r8
844; P8LE-NEXT:    srwi r10, r3, 31
845; P8LE-NEXT:    srawi r3, r3, 6
846; P8LE-NEXT:    slwi r8, r9, 5
847; P8LE-NEXT:    mtvsrd v2, r6
848; P8LE-NEXT:    add r3, r3, r10
849; P8LE-NEXT:    srawi r9, r4, 3
850; P8LE-NEXT:    sub r6, r7, r8
851; P8LE-NEXT:    mulli r3, r3, 95
852; P8LE-NEXT:    addze r7, r9
853; P8LE-NEXT:    mtvsrd v3, r6
854; P8LE-NEXT:    vmrghh v2, v3, v2
855; P8LE-NEXT:    sub r3, r5, r3
856; P8LE-NEXT:    slwi r5, r7, 3
857; P8LE-NEXT:    sub r4, r4, r5
858; P8LE-NEXT:    mtvsrd v4, r3
859; P8LE-NEXT:    mtvsrd v5, r4
860; P8LE-NEXT:    vmrghh v3, v4, v5
861; P8LE-NEXT:    vmrglw v2, v3, v2
862; P8LE-NEXT:    blr
863;
864; P8BE-LABEL: dont_fold_srem_power_of_two:
865; P8BE:       # %bb.0:
866; P8BE-NEXT:    mfvsrd r4, v2
867; P8BE-NEXT:    lis r3, -21386
868; P8BE-NEXT:    ori r3, r3, 37253
869; P8BE-NEXT:    clrldi r5, r4, 48
870; P8BE-NEXT:    rldicl r6, r4, 32, 48
871; P8BE-NEXT:    extsh r5, r5
872; P8BE-NEXT:    extsh r6, r6
873; P8BE-NEXT:    mulhw r3, r5, r3
874; P8BE-NEXT:    rldicl r7, r4, 16, 48
875; P8BE-NEXT:    srawi r8, r6, 5
876; P8BE-NEXT:    extsh r7, r7
877; P8BE-NEXT:    addze r8, r8
878; P8BE-NEXT:    rldicl r4, r4, 48, 48
879; P8BE-NEXT:    srawi r9, r7, 6
880; P8BE-NEXT:    extsh r4, r4
881; P8BE-NEXT:    slwi r8, r8, 5
882; P8BE-NEXT:    add r3, r3, r5
883; P8BE-NEXT:    addze r9, r9
884; P8BE-NEXT:    sub r6, r6, r8
885; P8BE-NEXT:    srwi r10, r3, 31
886; P8BE-NEXT:    srawi r3, r3, 6
887; P8BE-NEXT:    slwi r8, r9, 6
888; P8BE-NEXT:    add r3, r3, r10
889; P8BE-NEXT:    srawi r9, r4, 3
890; P8BE-NEXT:    sub r7, r7, r8
891; P8BE-NEXT:    mulli r3, r3, 95
892; P8BE-NEXT:    sldi r6, r6, 48
893; P8BE-NEXT:    addze r8, r9
894; P8BE-NEXT:    mtvsrd v2, r6
895; P8BE-NEXT:    slwi r6, r8, 3
896; P8BE-NEXT:    sub r4, r4, r6
897; P8BE-NEXT:    sldi r4, r4, 48
898; P8BE-NEXT:    sub r3, r5, r3
899; P8BE-NEXT:    sldi r5, r7, 48
900; P8BE-NEXT:    mtvsrd v5, r4
901; P8BE-NEXT:    sldi r3, r3, 48
902; P8BE-NEXT:    mtvsrd v3, r5
903; P8BE-NEXT:    mtvsrd v4, r3
904; P8BE-NEXT:    vmrghh v2, v3, v2
905; P8BE-NEXT:    vmrghh v3, v5, v4
906; P8BE-NEXT:    vmrghw v2, v2, v3
907; P8BE-NEXT:    blr
908  %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
909  ret <4 x i16> %1
910}
911
912; Don't fold if the divisor is one.
913define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
914; P9LE-LABEL: dont_fold_srem_one:
915; P9LE:       # %bb.0:
916; P9LE-NEXT:    li r3, 2
917; P9LE-NEXT:    lis r4, -14230
918; P9LE-NEXT:    vextuhrx r3, r3, v2
919; P9LE-NEXT:    ori r4, r4, 30865
920; P9LE-NEXT:    extsh r3, r3
921; P9LE-NEXT:    mulhw r4, r3, r4
922; P9LE-NEXT:    add r4, r4, r3
923; P9LE-NEXT:    srwi r5, r4, 31
924; P9LE-NEXT:    srawi r4, r4, 9
925; P9LE-NEXT:    add r4, r4, r5
926; P9LE-NEXT:    mulli r4, r4, 654
927; P9LE-NEXT:    sub r3, r3, r4
928; P9LE-NEXT:    lis r4, -19946
929; P9LE-NEXT:    mtvsrd v3, r3
930; P9LE-NEXT:    li r3, 0
931; P9LE-NEXT:    ori r4, r4, 17097
932; P9LE-NEXT:    mtvsrd v4, r3
933; P9LE-NEXT:    li r3, 4
934; P9LE-NEXT:    vextuhrx r3, r3, v2
935; P9LE-NEXT:    vmrghh v3, v3, v4
936; P9LE-NEXT:    extsh r3, r3
937; P9LE-NEXT:    mulhw r4, r3, r4
938; P9LE-NEXT:    add r4, r4, r3
939; P9LE-NEXT:    srwi r5, r4, 31
940; P9LE-NEXT:    srawi r4, r4, 4
941; P9LE-NEXT:    add r4, r4, r5
942; P9LE-NEXT:    mulli r4, r4, 23
943; P9LE-NEXT:    sub r3, r3, r4
944; P9LE-NEXT:    lis r4, 24749
945; P9LE-NEXT:    mtvsrd v4, r3
946; P9LE-NEXT:    li r3, 6
947; P9LE-NEXT:    ori r4, r4, 47143
948; P9LE-NEXT:    vextuhrx r3, r3, v2
949; P9LE-NEXT:    extsh r3, r3
950; P9LE-NEXT:    mulhw r4, r3, r4
951; P9LE-NEXT:    srwi r5, r4, 31
952; P9LE-NEXT:    srawi r4, r4, 11
953; P9LE-NEXT:    add r4, r4, r5
954; P9LE-NEXT:    mulli r4, r4, 5423
955; P9LE-NEXT:    sub r3, r3, r4
956; P9LE-NEXT:    mtvsrd v2, r3
957; P9LE-NEXT:    vmrghh v2, v2, v4
958; P9LE-NEXT:    vmrglw v2, v2, v3
959; P9LE-NEXT:    blr
960;
961; P9BE-LABEL: dont_fold_srem_one:
962; P9BE:       # %bb.0:
963; P9BE-NEXT:    li r3, 4
964; P9BE-NEXT:    lis r4, -19946
965; P9BE-NEXT:    vextuhlx r3, r3, v2
966; P9BE-NEXT:    ori r4, r4, 17097
967; P9BE-NEXT:    extsh r3, r3
968; P9BE-NEXT:    mulhw r4, r3, r4
969; P9BE-NEXT:    add r4, r4, r3
970; P9BE-NEXT:    srwi r5, r4, 31
971; P9BE-NEXT:    srawi r4, r4, 4
972; P9BE-NEXT:    add r4, r4, r5
973; P9BE-NEXT:    mulli r4, r4, 23
974; P9BE-NEXT:    sub r3, r3, r4
975; P9BE-NEXT:    lis r4, 24749
976; P9BE-NEXT:    sldi r3, r3, 48
977; P9BE-NEXT:    ori r4, r4, 47143
978; P9BE-NEXT:    mtvsrd v3, r3
979; P9BE-NEXT:    li r3, 6
980; P9BE-NEXT:    vextuhlx r3, r3, v2
981; P9BE-NEXT:    extsh r3, r3
982; P9BE-NEXT:    mulhw r4, r3, r4
983; P9BE-NEXT:    srwi r5, r4, 31
984; P9BE-NEXT:    srawi r4, r4, 11
985; P9BE-NEXT:    add r4, r4, r5
986; P9BE-NEXT:    mulli r4, r4, 5423
987; P9BE-NEXT:    sub r3, r3, r4
988; P9BE-NEXT:    lis r4, -14230
989; P9BE-NEXT:    sldi r3, r3, 48
990; P9BE-NEXT:    ori r4, r4, 30865
991; P9BE-NEXT:    mtvsrd v4, r3
992; P9BE-NEXT:    li r3, 2
993; P9BE-NEXT:    vextuhlx r3, r3, v2
994; P9BE-NEXT:    vmrghh v3, v3, v4
995; P9BE-NEXT:    extsh r3, r3
996; P9BE-NEXT:    mulhw r4, r3, r4
997; P9BE-NEXT:    add r4, r4, r3
998; P9BE-NEXT:    srwi r5, r4, 31
999; P9BE-NEXT:    srawi r4, r4, 9
1000; P9BE-NEXT:    add r4, r4, r5
1001; P9BE-NEXT:    mulli r4, r4, 654
1002; P9BE-NEXT:    sub r3, r3, r4
1003; P9BE-NEXT:    sldi r3, r3, 48
1004; P9BE-NEXT:    mtvsrd v2, r3
1005; P9BE-NEXT:    li r3, 0
1006; P9BE-NEXT:    sldi r3, r3, 48
1007; P9BE-NEXT:    mtvsrd v4, r3
1008; P9BE-NEXT:    vmrghh v2, v4, v2
1009; P9BE-NEXT:    vmrghw v2, v2, v3
1010; P9BE-NEXT:    blr
1011;
1012; P8LE-LABEL: dont_fold_srem_one:
1013; P8LE:       # %bb.0:
1014; P8LE-NEXT:    xxswapd vs0, v2
1015; P8LE-NEXT:    lis r5, 24749
1016; P8LE-NEXT:    lis r6, -19946
1017; P8LE-NEXT:    lis r8, -14230
1018; P8LE-NEXT:    ori r5, r5, 47143
1019; P8LE-NEXT:    ori r6, r6, 17097
1020; P8LE-NEXT:    ori r8, r8, 30865
1021; P8LE-NEXT:    mffprd r3, f0
1022; P8LE-NEXT:    rldicl r4, r3, 16, 48
1023; P8LE-NEXT:    rldicl r7, r3, 32, 48
1024; P8LE-NEXT:    rldicl r3, r3, 48, 48
1025; P8LE-NEXT:    extsh r4, r4
1026; P8LE-NEXT:    extsh r7, r7
1027; P8LE-NEXT:    extsh r3, r3
1028; P8LE-NEXT:    mulhw r5, r4, r5
1029; P8LE-NEXT:    mulhw r6, r7, r6
1030; P8LE-NEXT:    mulhw r8, r3, r8
1031; P8LE-NEXT:    srwi r9, r5, 31
1032; P8LE-NEXT:    srawi r5, r5, 11
1033; P8LE-NEXT:    add r6, r6, r7
1034; P8LE-NEXT:    add r8, r8, r3
1035; P8LE-NEXT:    add r5, r5, r9
1036; P8LE-NEXT:    srwi r9, r6, 31
1037; P8LE-NEXT:    srawi r6, r6, 4
1038; P8LE-NEXT:    add r6, r6, r9
1039; P8LE-NEXT:    srwi r9, r8, 31
1040; P8LE-NEXT:    srawi r8, r8, 9
1041; P8LE-NEXT:    mulli r5, r5, 5423
1042; P8LE-NEXT:    add r8, r8, r9
1043; P8LE-NEXT:    mulli r6, r6, 23
1044; P8LE-NEXT:    li r9, 0
1045; P8LE-NEXT:    mulli r8, r8, 654
1046; P8LE-NEXT:    mtvsrd v2, r9
1047; P8LE-NEXT:    sub r4, r4, r5
1048; P8LE-NEXT:    sub r5, r7, r6
1049; P8LE-NEXT:    mtvsrd v3, r4
1050; P8LE-NEXT:    sub r3, r3, r8
1051; P8LE-NEXT:    mtvsrd v4, r5
1052; P8LE-NEXT:    mtvsrd v5, r3
1053; P8LE-NEXT:    vmrghh v3, v3, v4
1054; P8LE-NEXT:    vmrghh v2, v5, v2
1055; P8LE-NEXT:    vmrglw v2, v3, v2
1056; P8LE-NEXT:    blr
1057;
1058; P8BE-LABEL: dont_fold_srem_one:
1059; P8BE:       # %bb.0:
1060; P8BE-NEXT:    mfvsrd r3, v2
1061; P8BE-NEXT:    lis r5, 24749
1062; P8BE-NEXT:    lis r6, -19946
1063; P8BE-NEXT:    lis r8, -14230
1064; P8BE-NEXT:    ori r5, r5, 47143
1065; P8BE-NEXT:    ori r6, r6, 17097
1066; P8BE-NEXT:    ori r8, r8, 30865
1067; P8BE-NEXT:    clrldi r4, r3, 48
1068; P8BE-NEXT:    rldicl r7, r3, 48, 48
1069; P8BE-NEXT:    rldicl r3, r3, 32, 48
1070; P8BE-NEXT:    extsh r4, r4
1071; P8BE-NEXT:    extsh r7, r7
1072; P8BE-NEXT:    extsh r3, r3
1073; P8BE-NEXT:    mulhw r5, r4, r5
1074; P8BE-NEXT:    mulhw r6, r7, r6
1075; P8BE-NEXT:    mulhw r8, r3, r8
1076; P8BE-NEXT:    srwi r9, r5, 31
1077; P8BE-NEXT:    srawi r5, r5, 11
1078; P8BE-NEXT:    add r6, r6, r7
1079; P8BE-NEXT:    add r8, r8, r3
1080; P8BE-NEXT:    add r5, r5, r9
1081; P8BE-NEXT:    srwi r9, r6, 31
1082; P8BE-NEXT:    srawi r6, r6, 4
1083; P8BE-NEXT:    add r6, r6, r9
1084; P8BE-NEXT:    srwi r9, r8, 31
1085; P8BE-NEXT:    srawi r8, r8, 9
1086; P8BE-NEXT:    mulli r5, r5, 5423
1087; P8BE-NEXT:    add r8, r8, r9
1088; P8BE-NEXT:    mulli r6, r6, 23
1089; P8BE-NEXT:    li r9, 0
1090; P8BE-NEXT:    mulli r8, r8, 654
1091; P8BE-NEXT:    sub r4, r4, r5
1092; P8BE-NEXT:    sldi r5, r9, 48
1093; P8BE-NEXT:    mtvsrd v2, r5
1094; P8BE-NEXT:    sub r5, r7, r6
1095; P8BE-NEXT:    sldi r4, r4, 48
1096; P8BE-NEXT:    sub r3, r3, r8
1097; P8BE-NEXT:    mtvsrd v3, r4
1098; P8BE-NEXT:    sldi r4, r5, 48
1099; P8BE-NEXT:    sldi r3, r3, 48
1100; P8BE-NEXT:    mtvsrd v4, r4
1101; P8BE-NEXT:    mtvsrd v5, r3
1102; P8BE-NEXT:    vmrghh v3, v4, v3
1103; P8BE-NEXT:    vmrghh v2, v2, v5
1104; P8BE-NEXT:    vmrghw v2, v2, v3
1105; P8BE-NEXT:    blr
1106  %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
1107  ret <4 x i16> %1
1108}
1109
1110; Don't fold if the divisor is 2^15.
1111define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
1112; P9LE-LABEL: dont_fold_urem_i16_smax:
1113; P9LE:       # %bb.0:
1114; P9LE-NEXT:    li r3, 4
1115; P9LE-NEXT:    lis r4, -19946
1116; P9LE-NEXT:    vextuhrx r3, r3, v2
1117; P9LE-NEXT:    ori r4, r4, 17097
1118; P9LE-NEXT:    extsh r3, r3
1119; P9LE-NEXT:    mulhw r4, r3, r4
1120; P9LE-NEXT:    add r4, r4, r3
1121; P9LE-NEXT:    srwi r5, r4, 31
1122; P9LE-NEXT:    srawi r4, r4, 4
1123; P9LE-NEXT:    add r4, r4, r5
1124; P9LE-NEXT:    mulli r4, r4, 23
1125; P9LE-NEXT:    sub r3, r3, r4
1126; P9LE-NEXT:    lis r4, 24749
1127; P9LE-NEXT:    mtvsrd v3, r3
1128; P9LE-NEXT:    li r3, 6
1129; P9LE-NEXT:    ori r4, r4, 47143
1130; P9LE-NEXT:    vextuhrx r3, r3, v2
1131; P9LE-NEXT:    extsh r3, r3
1132; P9LE-NEXT:    mulhw r4, r3, r4
1133; P9LE-NEXT:    srwi r5, r4, 31
1134; P9LE-NEXT:    srawi r4, r4, 11
1135; P9LE-NEXT:    add r4, r4, r5
1136; P9LE-NEXT:    mulli r4, r4, 5423
1137; P9LE-NEXT:    sub r3, r3, r4
1138; P9LE-NEXT:    mtvsrd v4, r3
1139; P9LE-NEXT:    li r3, 2
1140; P9LE-NEXT:    vextuhrx r3, r3, v2
1141; P9LE-NEXT:    vmrghh v3, v4, v3
1142; P9LE-NEXT:    extsh r3, r3
1143; P9LE-NEXT:    srawi r4, r3, 15
1144; P9LE-NEXT:    addze r4, r4
1145; P9LE-NEXT:    slwi r4, r4, 15
1146; P9LE-NEXT:    sub r3, r3, r4
1147; P9LE-NEXT:    mtvsrd v2, r3
1148; P9LE-NEXT:    li r3, 0
1149; P9LE-NEXT:    mtvsrd v4, r3
1150; P9LE-NEXT:    vmrghh v2, v2, v4
1151; P9LE-NEXT:    vmrglw v2, v3, v2
1152; P9LE-NEXT:    blr
1153;
1154; P9BE-LABEL: dont_fold_urem_i16_smax:
1155; P9BE:       # %bb.0:
1156; P9BE-NEXT:    li r3, 4
1157; P9BE-NEXT:    lis r4, -19946
1158; P9BE-NEXT:    vextuhlx r3, r3, v2
1159; P9BE-NEXT:    ori r4, r4, 17097
1160; P9BE-NEXT:    extsh r3, r3
1161; P9BE-NEXT:    mulhw r4, r3, r4
1162; P9BE-NEXT:    add r4, r4, r3
1163; P9BE-NEXT:    srwi r5, r4, 31
1164; P9BE-NEXT:    srawi r4, r4, 4
1165; P9BE-NEXT:    add r4, r4, r5
1166; P9BE-NEXT:    mulli r4, r4, 23
1167; P9BE-NEXT:    sub r3, r3, r4
1168; P9BE-NEXT:    lis r4, 24749
1169; P9BE-NEXT:    sldi r3, r3, 48
1170; P9BE-NEXT:    ori r4, r4, 47143
1171; P9BE-NEXT:    mtvsrd v3, r3
1172; P9BE-NEXT:    li r3, 6
1173; P9BE-NEXT:    vextuhlx r3, r3, v2
1174; P9BE-NEXT:    extsh r3, r3
1175; P9BE-NEXT:    mulhw r4, r3, r4
1176; P9BE-NEXT:    srwi r5, r4, 31
1177; P9BE-NEXT:    srawi r4, r4, 11
1178; P9BE-NEXT:    add r4, r4, r5
1179; P9BE-NEXT:    mulli r4, r4, 5423
1180; P9BE-NEXT:    sub r3, r3, r4
1181; P9BE-NEXT:    sldi r3, r3, 48
1182; P9BE-NEXT:    mtvsrd v4, r3
1183; P9BE-NEXT:    li r3, 2
1184; P9BE-NEXT:    vextuhlx r3, r3, v2
1185; P9BE-NEXT:    vmrghh v3, v3, v4
1186; P9BE-NEXT:    extsh r3, r3
1187; P9BE-NEXT:    srawi r4, r3, 15
1188; P9BE-NEXT:    addze r4, r4
1189; P9BE-NEXT:    slwi r4, r4, 15
1190; P9BE-NEXT:    sub r3, r3, r4
1191; P9BE-NEXT:    sldi r3, r3, 48
1192; P9BE-NEXT:    mtvsrd v2, r3
1193; P9BE-NEXT:    li r3, 0
1194; P9BE-NEXT:    sldi r3, r3, 48
1195; P9BE-NEXT:    mtvsrd v4, r3
1196; P9BE-NEXT:    vmrghh v2, v4, v2
1197; P9BE-NEXT:    vmrghw v2, v2, v3
1198; P9BE-NEXT:    blr
1199;
1200; P8LE-LABEL: dont_fold_urem_i16_smax:
1201; P8LE:       # %bb.0:
1202; P8LE-NEXT:    xxswapd vs0, v2
1203; P8LE-NEXT:    lis r4, 24749
1204; P8LE-NEXT:    lis r5, -19946
1205; P8LE-NEXT:    ori r4, r4, 47143
1206; P8LE-NEXT:    ori r5, r5, 17097
1207; P8LE-NEXT:    mffprd r3, f0
1208; P8LE-NEXT:    rldicl r6, r3, 16, 48
1209; P8LE-NEXT:    rldicl r7, r3, 32, 48
1210; P8LE-NEXT:    extsh r6, r6
1211; P8LE-NEXT:    extsh r7, r7
1212; P8LE-NEXT:    mulhw r4, r6, r4
1213; P8LE-NEXT:    mulhw r5, r7, r5
1214; P8LE-NEXT:    rldicl r3, r3, 48, 48
1215; P8LE-NEXT:    extsh r3, r3
1216; P8LE-NEXT:    srwi r8, r4, 31
1217; P8LE-NEXT:    srawi r4, r4, 11
1218; P8LE-NEXT:    add r5, r5, r7
1219; P8LE-NEXT:    add r4, r4, r8
1220; P8LE-NEXT:    srwi r8, r5, 31
1221; P8LE-NEXT:    srawi r5, r5, 4
1222; P8LE-NEXT:    mulli r4, r4, 5423
1223; P8LE-NEXT:    add r5, r5, r8
1224; P8LE-NEXT:    srawi r9, r3, 15
1225; P8LE-NEXT:    li r8, 0
1226; P8LE-NEXT:    mulli r5, r5, 23
1227; P8LE-NEXT:    mtvsrd v2, r8
1228; P8LE-NEXT:    sub r4, r6, r4
1229; P8LE-NEXT:    addze r6, r9
1230; P8LE-NEXT:    slwi r6, r6, 15
1231; P8LE-NEXT:    mtvsrd v3, r4
1232; P8LE-NEXT:    sub r5, r7, r5
1233; P8LE-NEXT:    sub r3, r3, r6
1234; P8LE-NEXT:    mtvsrd v4, r5
1235; P8LE-NEXT:    mtvsrd v5, r3
1236; P8LE-NEXT:    vmrghh v3, v3, v4
1237; P8LE-NEXT:    vmrghh v2, v5, v2
1238; P8LE-NEXT:    vmrglw v2, v3, v2
1239; P8LE-NEXT:    blr
1240;
1241; P8BE-LABEL: dont_fold_urem_i16_smax:
1242; P8BE:       # %bb.0:
1243; P8BE-NEXT:    mfvsrd r3, v2
1244; P8BE-NEXT:    lis r4, 24749
1245; P8BE-NEXT:    lis r5, -19946
1246; P8BE-NEXT:    ori r4, r4, 47143
1247; P8BE-NEXT:    ori r5, r5, 17097
1248; P8BE-NEXT:    clrldi r6, r3, 48
1249; P8BE-NEXT:    rldicl r7, r3, 48, 48
1250; P8BE-NEXT:    extsh r6, r6
1251; P8BE-NEXT:    extsh r7, r7
1252; P8BE-NEXT:    mulhw r4, r6, r4
1253; P8BE-NEXT:    mulhw r5, r7, r5
1254; P8BE-NEXT:    rldicl r3, r3, 32, 48
1255; P8BE-NEXT:    extsh r3, r3
1256; P8BE-NEXT:    srwi r8, r4, 31
1257; P8BE-NEXT:    srawi r4, r4, 11
1258; P8BE-NEXT:    add r5, r5, r7
1259; P8BE-NEXT:    add r4, r4, r8
1260; P8BE-NEXT:    srwi r8, r5, 31
1261; P8BE-NEXT:    srawi r5, r5, 4
1262; P8BE-NEXT:    mulli r4, r4, 5423
1263; P8BE-NEXT:    add r5, r5, r8
1264; P8BE-NEXT:    li r8, 0
1265; P8BE-NEXT:    mulli r5, r5, 23
1266; P8BE-NEXT:    srawi r9, r3, 15
1267; P8BE-NEXT:    sub r4, r6, r4
1268; P8BE-NEXT:    sldi r6, r8, 48
1269; P8BE-NEXT:    addze r8, r9
1270; P8BE-NEXT:    mtvsrd v2, r6
1271; P8BE-NEXT:    slwi r6, r8, 15
1272; P8BE-NEXT:    sldi r4, r4, 48
1273; P8BE-NEXT:    sub r5, r7, r5
1274; P8BE-NEXT:    sub r3, r3, r6
1275; P8BE-NEXT:    mtvsrd v3, r4
1276; P8BE-NEXT:    sldi r4, r5, 48
1277; P8BE-NEXT:    sldi r3, r3, 48
1278; P8BE-NEXT:    mtvsrd v4, r4
1279; P8BE-NEXT:    mtvsrd v5, r3
1280; P8BE-NEXT:    vmrghh v3, v4, v3
1281; P8BE-NEXT:    vmrghh v2, v2, v5
1282; P8BE-NEXT:    vmrghw v2, v2, v3
1283; P8BE-NEXT:    blr
1284  %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
1285  ret <4 x i16> %1
1286}
1287
1288; Don't fold i64 srem.
1289define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
1290; P9LE-LABEL: dont_fold_srem_i64:
1291; P9LE:       # %bb.0:
1292; P9LE-NEXT:    lis r4, 24749
1293; P9LE-NEXT:    mfvsrd r3, v3
1294; P9LE-NEXT:    ori r4, r4, 47142
1295; P9LE-NEXT:    sldi r4, r4, 32
1296; P9LE-NEXT:    oris r4, r4, 58853
1297; P9LE-NEXT:    ori r4, r4, 6055
1298; P9LE-NEXT:    mulhd r4, r3, r4
1299; P9LE-NEXT:    rldicl r5, r4, 1, 63
1300; P9LE-NEXT:    sradi r4, r4, 11
1301; P9LE-NEXT:    add r4, r4, r5
1302; P9LE-NEXT:    lis r5, -19946
1303; P9LE-NEXT:    mulli r4, r4, 5423
1304; P9LE-NEXT:    ori r5, r5, 17096
1305; P9LE-NEXT:    sldi r5, r5, 32
1306; P9LE-NEXT:    oris r5, r5, 22795
1307; P9LE-NEXT:    sub r3, r3, r4
1308; P9LE-NEXT:    mfvsrld r4, v3
1309; P9LE-NEXT:    ori r5, r5, 8549
1310; P9LE-NEXT:    mulhd r5, r4, r5
1311; P9LE-NEXT:    add r5, r5, r4
1312; P9LE-NEXT:    rldicl r6, r5, 1, 63
1313; P9LE-NEXT:    sradi r5, r5, 4
1314; P9LE-NEXT:    add r5, r5, r6
1315; P9LE-NEXT:    mulli r5, r5, 23
1316; P9LE-NEXT:    sub r4, r4, r5
1317; P9LE-NEXT:    mtvsrdd v3, r3, r4
1318; P9LE-NEXT:    lis r4, 25653
1319; P9LE-NEXT:    mfvsrd r3, v2
1320; P9LE-NEXT:    ori r4, r4, 15432
1321; P9LE-NEXT:    sldi r4, r4, 32
1322; P9LE-NEXT:    oris r4, r4, 1603
1323; P9LE-NEXT:    ori r4, r4, 21445
1324; P9LE-NEXT:    mulhd r4, r3, r4
1325; P9LE-NEXT:    rldicl r5, r4, 1, 63
1326; P9LE-NEXT:    sradi r4, r4, 8
1327; P9LE-NEXT:    add r4, r4, r5
1328; P9LE-NEXT:    mulli r4, r4, 654
1329; P9LE-NEXT:    sub r3, r3, r4
1330; P9LE-NEXT:    li r4, 0
1331; P9LE-NEXT:    mtvsrdd v2, r3, r4
1332; P9LE-NEXT:    blr
1333;
1334; P9BE-LABEL: dont_fold_srem_i64:
1335; P9BE:       # %bb.0:
1336; P9BE-NEXT:    lis r4, 24749
1337; P9BE-NEXT:    mfvsrld r3, v3
1338; P9BE-NEXT:    ori r4, r4, 47142
1339; P9BE-NEXT:    sldi r4, r4, 32
1340; P9BE-NEXT:    oris r4, r4, 58853
1341; P9BE-NEXT:    ori r4, r4, 6055
1342; P9BE-NEXT:    mulhd r4, r3, r4
1343; P9BE-NEXT:    rldicl r5, r4, 1, 63
1344; P9BE-NEXT:    sradi r4, r4, 11
1345; P9BE-NEXT:    add r4, r4, r5
1346; P9BE-NEXT:    lis r5, -19946
1347; P9BE-NEXT:    ori r5, r5, 17096
1348; P9BE-NEXT:    mulli r4, r4, 5423
1349; P9BE-NEXT:    sldi r5, r5, 32
1350; P9BE-NEXT:    oris r5, r5, 22795
1351; P9BE-NEXT:    sub r3, r3, r4
1352; P9BE-NEXT:    mfvsrd r4, v3
1353; P9BE-NEXT:    ori r5, r5, 8549
1354; P9BE-NEXT:    mulhd r5, r4, r5
1355; P9BE-NEXT:    add r5, r5, r4
1356; P9BE-NEXT:    rldicl r6, r5, 1, 63
1357; P9BE-NEXT:    sradi r5, r5, 4
1358; P9BE-NEXT:    add r5, r5, r6
1359; P9BE-NEXT:    mulli r5, r5, 23
1360; P9BE-NEXT:    sub r4, r4, r5
1361; P9BE-NEXT:    mtvsrdd v3, r4, r3
1362; P9BE-NEXT:    lis r4, 25653
1363; P9BE-NEXT:    mfvsrld r3, v2
1364; P9BE-NEXT:    ori r4, r4, 15432
1365; P9BE-NEXT:    sldi r4, r4, 32
1366; P9BE-NEXT:    oris r4, r4, 1603
1367; P9BE-NEXT:    ori r4, r4, 21445
1368; P9BE-NEXT:    mulhd r4, r3, r4
1369; P9BE-NEXT:    rldicl r5, r4, 1, 63
1370; P9BE-NEXT:    sradi r4, r4, 8
1371; P9BE-NEXT:    add r4, r4, r5
1372; P9BE-NEXT:    mulli r4, r4, 654
1373; P9BE-NEXT:    sub r3, r3, r4
1374; P9BE-NEXT:    mtvsrdd v2, 0, r3
1375; P9BE-NEXT:    blr
1376;
1377; P8LE-LABEL: dont_fold_srem_i64:
1378; P8LE:       # %bb.0:
1379; P8LE-NEXT:    lis r3, 24749
1380; P8LE-NEXT:    lis r4, -19946
1381; P8LE-NEXT:    lis r5, 25653
1382; P8LE-NEXT:    xxswapd vs0, v3
1383; P8LE-NEXT:    mfvsrd r6, v3
1384; P8LE-NEXT:    ori r3, r3, 47142
1385; P8LE-NEXT:    ori r4, r4, 17096
1386; P8LE-NEXT:    ori r5, r5, 15432
1387; P8LE-NEXT:    mfvsrd r7, v2
1388; P8LE-NEXT:    sldi r3, r3, 32
1389; P8LE-NEXT:    sldi r4, r4, 32
1390; P8LE-NEXT:    sldi r5, r5, 32
1391; P8LE-NEXT:    oris r3, r3, 58853
1392; P8LE-NEXT:    oris r4, r4, 22795
1393; P8LE-NEXT:    mffprd r8, f0
1394; P8LE-NEXT:    oris r5, r5, 1603
1395; P8LE-NEXT:    ori r3, r3, 6055
1396; P8LE-NEXT:    ori r4, r4, 8549
1397; P8LE-NEXT:    ori r5, r5, 21445
1398; P8LE-NEXT:    mulhd r3, r6, r3
1399; P8LE-NEXT:    mulhd r5, r7, r5
1400; P8LE-NEXT:    mulhd r4, r8, r4
1401; P8LE-NEXT:    rldicl r9, r3, 1, 63
1402; P8LE-NEXT:    sradi r3, r3, 11
1403; P8LE-NEXT:    add r3, r3, r9
1404; P8LE-NEXT:    rldicl r9, r5, 1, 63
1405; P8LE-NEXT:    add r4, r4, r8
1406; P8LE-NEXT:    sradi r5, r5, 8
1407; P8LE-NEXT:    mulli r3, r3, 5423
1408; P8LE-NEXT:    add r5, r5, r9
1409; P8LE-NEXT:    rldicl r9, r4, 1, 63
1410; P8LE-NEXT:    sradi r4, r4, 4
1411; P8LE-NEXT:    mulli r5, r5, 654
1412; P8LE-NEXT:    add r4, r4, r9
1413; P8LE-NEXT:    mulli r4, r4, 23
1414; P8LE-NEXT:    sub r3, r6, r3
1415; P8LE-NEXT:    mtfprd f0, r3
1416; P8LE-NEXT:    sub r5, r7, r5
1417; P8LE-NEXT:    mtfprd f1, r5
1418; P8LE-NEXT:    sub r3, r8, r4
1419; P8LE-NEXT:    li r4, 0
1420; P8LE-NEXT:    mtfprd f2, r3
1421; P8LE-NEXT:    mtfprd f3, r4
1422; P8LE-NEXT:    xxmrghd v3, vs0, vs2
1423; P8LE-NEXT:    xxmrghd v2, vs1, vs3
1424; P8LE-NEXT:    blr
1425;
1426; P8BE-LABEL: dont_fold_srem_i64:
1427; P8BE:       # %bb.0:
1428; P8BE-NEXT:    lis r4, -19946
1429; P8BE-NEXT:    lis r3, 24749
1430; P8BE-NEXT:    xxswapd vs0, v3
1431; P8BE-NEXT:    lis r5, 25653
1432; P8BE-NEXT:    xxswapd vs1, v2
1433; P8BE-NEXT:    ori r4, r4, 17096
1434; P8BE-NEXT:    ori r3, r3, 47142
1435; P8BE-NEXT:    ori r5, r5, 15432
1436; P8BE-NEXT:    mfvsrd r6, v3
1437; P8BE-NEXT:    sldi r4, r4, 32
1438; P8BE-NEXT:    sldi r3, r3, 32
1439; P8BE-NEXT:    oris r4, r4, 22795
1440; P8BE-NEXT:    sldi r5, r5, 32
1441; P8BE-NEXT:    oris r3, r3, 58853
1442; P8BE-NEXT:    mffprd r7, f0
1443; P8BE-NEXT:    ori r4, r4, 8549
1444; P8BE-NEXT:    ori r3, r3, 6055
1445; P8BE-NEXT:    oris r5, r5, 1603
1446; P8BE-NEXT:    mffprd r8, f1
1447; P8BE-NEXT:    mulhd r4, r6, r4
1448; P8BE-NEXT:    mulhd r3, r7, r3
1449; P8BE-NEXT:    ori r5, r5, 21445
1450; P8BE-NEXT:    mulhd r5, r8, r5
1451; P8BE-NEXT:    add r4, r4, r6
1452; P8BE-NEXT:    rldicl r9, r3, 1, 63
1453; P8BE-NEXT:    sradi r3, r3, 11
1454; P8BE-NEXT:    rldicl r10, r4, 1, 63
1455; P8BE-NEXT:    sradi r4, r4, 4
1456; P8BE-NEXT:    add r3, r3, r9
1457; P8BE-NEXT:    rldicl r9, r5, 1, 63
1458; P8BE-NEXT:    add r4, r4, r10
1459; P8BE-NEXT:    sradi r5, r5, 8
1460; P8BE-NEXT:    mulli r3, r3, 5423
1461; P8BE-NEXT:    add r5, r5, r9
1462; P8BE-NEXT:    mulli r4, r4, 23
1463; P8BE-NEXT:    mulli r5, r5, 654
1464; P8BE-NEXT:    sub r3, r7, r3
1465; P8BE-NEXT:    sub r4, r6, r4
1466; P8BE-NEXT:    mtfprd f0, r3
1467; P8BE-NEXT:    sub r3, r8, r5
1468; P8BE-NEXT:    mtfprd f1, r4
1469; P8BE-NEXT:    li r4, 0
1470; P8BE-NEXT:    mtfprd f2, r3
1471; P8BE-NEXT:    mtfprd f3, r4
1472; P8BE-NEXT:    xxmrghd v3, vs1, vs0
1473; P8BE-NEXT:    xxmrghd v2, vs3, vs2
1474; P8BE-NEXT:    blr
1475  %1 = srem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>
1476  ret <4 x i64> %1
1477}
1478