1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=ALL 3 4; If the target does not have a single div/rem operation, 5; -div-rem-pairs pass will decompose the remainder calculation as: 6; X % Y --> X - ((X / Y) * Y) 7; But if the target does have a single div/rem operation, 8; the opposite transform is likely beneficial. 9 10define i8 @scalar_i8(i8 %x, i8 %y, i8* %divdst) nounwind { 11; ALL-LABEL: scalar_i8: 12; ALL: // %bb.0: 13; ALL-NEXT: and w8, w1, #0xff 14; ALL-NEXT: and w9, w0, #0xff 15; ALL-NEXT: udiv w8, w9, w8 16; ALL-NEXT: msub w0, w8, w1, w0 17; ALL-NEXT: strb w8, [x2] 18; ALL-NEXT: ret 19 %div = udiv i8 %x, %y 20 store i8 %div, i8* %divdst, align 4 21 %t1 = mul i8 %div, %y 22 %t2 = sub i8 %x, %t1 23 ret i8 %t2 24} 25 26define i16 @scalar_i16(i16 %x, i16 %y, i16* %divdst) nounwind { 27; ALL-LABEL: scalar_i16: 28; ALL: // %bb.0: 29; ALL-NEXT: and w8, w1, #0xffff 30; ALL-NEXT: and w9, w0, #0xffff 31; ALL-NEXT: udiv w8, w9, w8 32; ALL-NEXT: msub w0, w8, w1, w0 33; ALL-NEXT: strh w8, [x2] 34; ALL-NEXT: ret 35 %div = udiv i16 %x, %y 36 store i16 %div, i16* %divdst, align 4 37 %t1 = mul i16 %div, %y 38 %t2 = sub i16 %x, %t1 39 ret i16 %t2 40} 41 42define i32 @scalar_i32(i32 %x, i32 %y, i32* %divdst) nounwind { 43; ALL-LABEL: scalar_i32: 44; ALL: // %bb.0: 45; ALL-NEXT: udiv w8, w0, w1 46; ALL-NEXT: msub w0, w8, w1, w0 47; ALL-NEXT: str w8, [x2] 48; ALL-NEXT: ret 49 %div = udiv i32 %x, %y 50 store i32 %div, i32* %divdst, align 4 51 %t1 = mul i32 %div, %y 52 %t2 = sub i32 %x, %t1 53 ret i32 %t2 54} 55 56define i64 @scalar_i64(i64 %x, i64 %y, i64* %divdst) nounwind { 57; ALL-LABEL: scalar_i64: 58; ALL: // %bb.0: 59; ALL-NEXT: udiv x8, x0, x1 60; ALL-NEXT: msub x0, x8, x1, x0 61; ALL-NEXT: str x8, [x2] 62; ALL-NEXT: ret 63 %div = udiv i64 %x, %y 64 store i64 %div, i64* %divdst, align 4 65 %t1 = mul i64 %div, %y 66 %t2 = sub i64 %x, %t1 67 ret i64 %t2 68} 69 70define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y, <16 x i8>* %divdst) nounwind { 71; ALL-LABEL: vector_i128_i8: 72; ALL: // %bb.0: 73; ALL-NEXT: umov w10, v1.b[0] 74; ALL-NEXT: umov w11, v0.b[0] 75; ALL-NEXT: umov w8, v1.b[1] 76; ALL-NEXT: umov w9, v0.b[1] 77; ALL-NEXT: udiv w10, w11, w10 78; ALL-NEXT: umov w12, v1.b[2] 79; ALL-NEXT: umov w13, v0.b[2] 80; ALL-NEXT: udiv w8, w9, w8 81; ALL-NEXT: fmov s2, w10 82; ALL-NEXT: umov w14, v1.b[3] 83; ALL-NEXT: umov w15, v0.b[3] 84; ALL-NEXT: udiv w12, w13, w12 85; ALL-NEXT: mov v2.b[1], w8 86; ALL-NEXT: umov w16, v1.b[4] 87; ALL-NEXT: umov w17, v0.b[4] 88; ALL-NEXT: udiv w14, w15, w14 89; ALL-NEXT: mov v2.b[2], w12 90; ALL-NEXT: umov w18, v1.b[5] 91; ALL-NEXT: umov w1, v0.b[5] 92; ALL-NEXT: udiv w16, w17, w16 93; ALL-NEXT: mov v2.b[3], w14 94; ALL-NEXT: umov w2, v1.b[6] 95; ALL-NEXT: umov w3, v0.b[6] 96; ALL-NEXT: udiv w18, w1, w18 97; ALL-NEXT: mov v2.b[4], w16 98; ALL-NEXT: umov w4, v1.b[7] 99; ALL-NEXT: umov w5, v0.b[7] 100; ALL-NEXT: udiv w2, w3, w2 101; ALL-NEXT: mov v2.b[5], w18 102; ALL-NEXT: umov w9, v1.b[8] 103; ALL-NEXT: umov w11, v0.b[8] 104; ALL-NEXT: udiv w4, w5, w4 105; ALL-NEXT: mov v2.b[6], w2 106; ALL-NEXT: umov w13, v1.b[9] 107; ALL-NEXT: umov w15, v0.b[9] 108; ALL-NEXT: udiv w9, w11, w9 109; ALL-NEXT: mov v2.b[7], w4 110; ALL-NEXT: umov w17, v1.b[10] 111; ALL-NEXT: umov w1, v0.b[10] 112; ALL-NEXT: udiv w13, w15, w13 113; ALL-NEXT: mov v2.b[8], w9 114; ALL-NEXT: umov w3, v1.b[11] 115; ALL-NEXT: umov w5, v0.b[11] 116; ALL-NEXT: udiv w17, w1, w17 117; ALL-NEXT: mov v2.b[9], w13 118; ALL-NEXT: umov w11, v1.b[12] 119; ALL-NEXT: umov w15, v0.b[12] 120; ALL-NEXT: udiv w3, w5, w3 121; ALL-NEXT: mov v2.b[10], w17 122; ALL-NEXT: umov w1, v1.b[13] 123; ALL-NEXT: umov w5, v0.b[13] 124; ALL-NEXT: udiv w11, w15, w11 125; ALL-NEXT: mov v2.b[11], w3 126; ALL-NEXT: umov w15, v1.b[14] 127; ALL-NEXT: udiv w1, w5, w1 128; ALL-NEXT: umov w5, v0.b[14] 129; ALL-NEXT: mov v2.b[12], w11 130; ALL-NEXT: udiv w15, w5, w15 131; ALL-NEXT: umov w8, v1.b[15] 132; ALL-NEXT: mov v2.b[13], w1 133; ALL-NEXT: umov w9, v0.b[15] 134; ALL-NEXT: mov v2.b[14], w15 135; ALL-NEXT: udiv w8, w9, w8 136; ALL-NEXT: mov v2.b[15], w8 137; ALL-NEXT: mls v0.16b, v2.16b, v1.16b 138; ALL-NEXT: str q2, [x0] 139; ALL-NEXT: ret 140 %div = udiv <16 x i8> %x, %y 141 store <16 x i8> %div, <16 x i8>* %divdst, align 16 142 %t1 = mul <16 x i8> %div, %y 143 %t2 = sub <16 x i8> %x, %t1 144 ret <16 x i8> %t2 145} 146 147define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y, <8 x i16>* %divdst) nounwind { 148; ALL-LABEL: vector_i128_i16: 149; ALL: // %bb.0: 150; ALL-NEXT: umov w10, v1.h[0] 151; ALL-NEXT: umov w11, v0.h[0] 152; ALL-NEXT: umov w8, v1.h[1] 153; ALL-NEXT: umov w9, v0.h[1] 154; ALL-NEXT: udiv w10, w11, w10 155; ALL-NEXT: umov w12, v1.h[2] 156; ALL-NEXT: umov w13, v0.h[2] 157; ALL-NEXT: udiv w8, w9, w8 158; ALL-NEXT: fmov s2, w10 159; ALL-NEXT: umov w14, v1.h[3] 160; ALL-NEXT: umov w15, v0.h[3] 161; ALL-NEXT: udiv w12, w13, w12 162; ALL-NEXT: mov v2.h[1], w8 163; ALL-NEXT: umov w9, v1.h[4] 164; ALL-NEXT: umov w11, v0.h[4] 165; ALL-NEXT: udiv w14, w15, w14 166; ALL-NEXT: mov v2.h[2], w12 167; ALL-NEXT: umov w13, v1.h[5] 168; ALL-NEXT: umov w15, v0.h[5] 169; ALL-NEXT: udiv w9, w11, w9 170; ALL-NEXT: mov v2.h[3], w14 171; ALL-NEXT: umov w11, v1.h[6] 172; ALL-NEXT: udiv w13, w15, w13 173; ALL-NEXT: umov w15, v0.h[6] 174; ALL-NEXT: mov v2.h[4], w9 175; ALL-NEXT: udiv w11, w15, w11 176; ALL-NEXT: umov w8, v1.h[7] 177; ALL-NEXT: mov v2.h[5], w13 178; ALL-NEXT: umov w9, v0.h[7] 179; ALL-NEXT: mov v2.h[6], w11 180; ALL-NEXT: udiv w8, w9, w8 181; ALL-NEXT: mov v2.h[7], w8 182; ALL-NEXT: mls v0.8h, v2.8h, v1.8h 183; ALL-NEXT: str q2, [x0] 184; ALL-NEXT: ret 185 %div = udiv <8 x i16> %x, %y 186 store <8 x i16> %div, <8 x i16>* %divdst, align 16 187 %t1 = mul <8 x i16> %div, %y 188 %t2 = sub <8 x i16> %x, %t1 189 ret <8 x i16> %t2 190} 191 192define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %divdst) nounwind { 193; ALL-LABEL: vector_i128_i32: 194; ALL: // %bb.0: 195; ALL-NEXT: fmov w9, s1 196; ALL-NEXT: fmov w10, s0 197; ALL-NEXT: mov w8, v1.s[1] 198; ALL-NEXT: udiv w9, w10, w9 199; ALL-NEXT: mov w10, v0.s[1] 200; ALL-NEXT: udiv w8, w10, w8 201; ALL-NEXT: mov w10, v1.s[2] 202; ALL-NEXT: fmov s2, w9 203; ALL-NEXT: mov w9, v0.s[2] 204; ALL-NEXT: udiv w9, w9, w10 205; ALL-NEXT: mov w10, v1.s[3] 206; ALL-NEXT: mov v2.s[1], w8 207; ALL-NEXT: mov w8, v0.s[3] 208; ALL-NEXT: mov v2.s[2], w9 209; ALL-NEXT: udiv w8, w8, w10 210; ALL-NEXT: mov v2.s[3], w8 211; ALL-NEXT: mls v0.4s, v2.4s, v1.4s 212; ALL-NEXT: str q2, [x0] 213; ALL-NEXT: ret 214 %div = udiv <4 x i32> %x, %y 215 store <4 x i32> %div, <4 x i32>* %divdst, align 16 216 %t1 = mul <4 x i32> %div, %y 217 %t2 = sub <4 x i32> %x, %t1 218 ret <4 x i32> %t2 219} 220 221define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, <2 x i64>* %divdst) nounwind { 222; ALL-LABEL: vector_i128_i64: 223; ALL: // %bb.0: 224; ALL-NEXT: fmov x10, d1 225; ALL-NEXT: fmov x11, d0 226; ALL-NEXT: mov x8, v1.d[1] 227; ALL-NEXT: mov x9, v0.d[1] 228; ALL-NEXT: udiv x11, x11, x10 229; ALL-NEXT: udiv x9, x9, x8 230; ALL-NEXT: mul x10, x11, x10 231; ALL-NEXT: mul x8, x9, x8 232; ALL-NEXT: fmov d1, x10 233; ALL-NEXT: mov v1.d[1], x8 234; ALL-NEXT: sub v0.2d, v0.2d, v1.2d 235; ALL-NEXT: fmov d1, x11 236; ALL-NEXT: mov v1.d[1], x9 237; ALL-NEXT: str q1, [x0] 238; ALL-NEXT: ret 239 %div = udiv <2 x i64> %x, %y 240 store <2 x i64> %div, <2 x i64>* %divdst, align 16 241 %t1 = mul <2 x i64> %div, %y 242 %t2 = sub <2 x i64> %x, %t1 243 ret <2 x i64> %t2 244} 245 246; Special tests. 247 248define i32 @scalar_i32_commutative(i32 %x, i32* %ysrc, i32* %divdst) nounwind { 249; ALL-LABEL: scalar_i32_commutative: 250; ALL: // %bb.0: 251; ALL-NEXT: ldr w8, [x1] 252; ALL-NEXT: udiv w9, w0, w8 253; ALL-NEXT: msub w0, w8, w9, w0 254; ALL-NEXT: str w9, [x2] 255; ALL-NEXT: ret 256 %y = load i32, i32* %ysrc, align 4 257 %div = udiv i32 %x, %y 258 store i32 %div, i32* %divdst, align 4 259 %t1 = mul i32 %y, %div ; commutative 260 %t2 = sub i32 %x, %t1 261 ret i32 %t2 262} 263 264; We do not care about extra uses. 265define i32 @extrause(i32 %x, i32 %y, i32* %divdst, i32* %t1dst) nounwind { 266; ALL-LABEL: extrause: 267; ALL: // %bb.0: 268; ALL-NEXT: udiv w8, w0, w1 269; ALL-NEXT: str w8, [x2] 270; ALL-NEXT: mul w8, w8, w1 271; ALL-NEXT: sub w0, w0, w8 272; ALL-NEXT: str w8, [x3] 273; ALL-NEXT: ret 274 %div = udiv i32 %x, %y 275 store i32 %div, i32* %divdst, align 4 276 %t1 = mul i32 %div, %y 277 store i32 %t1, i32* %t1dst, align 4 278 %t2 = sub i32 %x, %t1 279 ret i32 %t2 280} 281 282; 'rem' should appear next to 'div'. 283define i32 @multiple_bb(i32 %x, i32 %y, i32* %divdst, i1 zeroext %store_urem, i32* %uremdst) nounwind { 284; ALL-LABEL: multiple_bb: 285; ALL: // %bb.0: 286; ALL-NEXT: mov w8, w0 287; ALL-NEXT: udiv w0, w0, w1 288; ALL-NEXT: str w0, [x2] 289; ALL-NEXT: cbz w3, .LBB10_2 290; ALL-NEXT: // %bb.1: // %do_urem 291; ALL-NEXT: msub w8, w0, w1, w8 292; ALL-NEXT: str w8, [x4] 293; ALL-NEXT: .LBB10_2: // %end 294; ALL-NEXT: ret 295 %div = udiv i32 %x, %y 296 store i32 %div, i32* %divdst, align 4 297 br i1 %store_urem, label %do_urem, label %end 298do_urem: 299 %t1 = mul i32 %div, %y 300 %t2 = sub i32 %x, %t1 301 store i32 %t2, i32* %uremdst, align 4 302 br label %end 303end: 304 ret i32 %div 305} 306 307define i32 @negative_different_x(i32 %x0, i32 %x1, i32 %y, i32* %divdst) nounwind { 308; ALL-LABEL: negative_different_x: 309; ALL: // %bb.0: 310; ALL-NEXT: udiv w8, w0, w2 311; ALL-NEXT: msub w0, w8, w2, w1 312; ALL-NEXT: str w8, [x3] 313; ALL-NEXT: ret 314 %div = udiv i32 %x0, %y ; not %x1 315 store i32 %div, i32* %divdst, align 4 316 %t1 = mul i32 %div, %y 317 %t2 = sub i32 %x1, %t1 ; not %x0 318 ret i32 %t2 319} 320