1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 3; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9LE 4; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 5; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9BE 6; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 7; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8LE 8; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 9; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8BE 10 11define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { 12; P9LE-LABEL: fold_urem_vec_1: 13; P9LE: # %bb.0: 14; P9LE-NEXT: li r3, 4 15; P9LE-NEXT: lis r4, 21399 16; P9LE-NEXT: lis r5, 8456 17; P9LE-NEXT: vextuhrx r3, r3, v2 18; P9LE-NEXT: ori r4, r4, 33437 19; P9LE-NEXT: ori r5, r5, 16913 20; P9LE-NEXT: clrlwi r3, r3, 16 21; P9LE-NEXT: mulhwu r4, r3, r4 22; P9LE-NEXT: srwi r4, r4, 5 23; P9LE-NEXT: mulli r4, r4, 98 24; P9LE-NEXT: sub r3, r3, r4 25; P9LE-NEXT: lis r4, 16727 26; P9LE-NEXT: mtvsrd v3, r3 27; P9LE-NEXT: li r3, 6 28; P9LE-NEXT: ori r4, r4, 2287 29; P9LE-NEXT: vextuhrx r3, r3, v2 30; P9LE-NEXT: clrlwi r3, r3, 16 31; P9LE-NEXT: mulhwu r4, r3, r4 32; P9LE-NEXT: srwi r4, r4, 8 33; P9LE-NEXT: mulli r4, r4, 1003 34; P9LE-NEXT: sub r3, r3, r4 35; P9LE-NEXT: mtvsrd v4, r3 36; P9LE-NEXT: li r3, 2 37; P9LE-NEXT: vextuhrx r3, r3, v2 38; P9LE-NEXT: vmrghh v3, v4, v3 39; P9LE-NEXT: clrlwi r4, r3, 16 40; P9LE-NEXT: rlwinm r3, r3, 30, 18, 31 41; P9LE-NEXT: mulhwu r3, r3, r5 42; P9LE-NEXT: srwi r3, r3, 2 43; P9LE-NEXT: mulli r3, r3, 124 44; P9LE-NEXT: sub r3, r4, r3 45; P9LE-NEXT: lis r4, 22765 46; P9LE-NEXT: mtvsrd v4, r3 47; P9LE-NEXT: li r3, 0 48; P9LE-NEXT: ori r4, r4, 8969 49; P9LE-NEXT: vextuhrx r3, r3, v2 50; P9LE-NEXT: clrlwi r3, r3, 16 51; P9LE-NEXT: mulhwu r4, r3, r4 52; P9LE-NEXT: sub r5, r3, r4 53; P9LE-NEXT: srwi r5, r5, 1 54; P9LE-NEXT: add r4, r5, r4 55; P9LE-NEXT: srwi r4, r4, 6 56; P9LE-NEXT: mulli r4, r4, 95 57; P9LE-NEXT: sub r3, r3, r4 58; P9LE-NEXT: mtvsrd v2, r3 59; P9LE-NEXT: vmrghh v2, v4, v2 60; P9LE-NEXT: vmrglw v2, v3, v2 61; P9LE-NEXT: blr 62; 63; P9BE-LABEL: fold_urem_vec_1: 64; P9BE: # %bb.0: 65; P9BE-NEXT: li r3, 6 66; P9BE-NEXT: lis r4, 16727 67; P9BE-NEXT: lis r5, 8456 68; P9BE-NEXT: vextuhlx r3, r3, v2 69; P9BE-NEXT: ori r4, r4, 2287 70; P9BE-NEXT: ori r5, r5, 16913 71; P9BE-NEXT: clrlwi r3, r3, 16 72; P9BE-NEXT: mulhwu r4, r3, r4 73; P9BE-NEXT: srwi r4, r4, 8 74; P9BE-NEXT: mulli r4, r4, 1003 75; P9BE-NEXT: sub r3, r3, r4 76; P9BE-NEXT: lis r4, 21399 77; P9BE-NEXT: sldi r3, r3, 48 78; P9BE-NEXT: ori r4, r4, 33437 79; P9BE-NEXT: mtvsrd v3, r3 80; P9BE-NEXT: li r3, 4 81; P9BE-NEXT: vextuhlx r3, r3, v2 82; P9BE-NEXT: clrlwi r3, r3, 16 83; P9BE-NEXT: mulhwu r4, r3, r4 84; P9BE-NEXT: srwi r4, r4, 5 85; P9BE-NEXT: mulli r4, r4, 98 86; P9BE-NEXT: sub r3, r3, r4 87; P9BE-NEXT: sldi r3, r3, 48 88; P9BE-NEXT: mtvsrd v4, r3 89; P9BE-NEXT: li r3, 2 90; P9BE-NEXT: vextuhlx r3, r3, v2 91; P9BE-NEXT: vmrghh v3, v4, v3 92; P9BE-NEXT: clrlwi r4, r3, 16 93; P9BE-NEXT: rlwinm r3, r3, 30, 18, 31 94; P9BE-NEXT: mulhwu r3, r3, r5 95; P9BE-NEXT: srwi r3, r3, 2 96; P9BE-NEXT: mulli r3, r3, 124 97; P9BE-NEXT: sub r3, r4, r3 98; P9BE-NEXT: lis r4, 22765 99; P9BE-NEXT: sldi r3, r3, 48 100; P9BE-NEXT: ori r4, r4, 8969 101; P9BE-NEXT: mtvsrd v4, r3 102; P9BE-NEXT: li r3, 0 103; P9BE-NEXT: vextuhlx r3, r3, v2 104; P9BE-NEXT: clrlwi r3, r3, 16 105; P9BE-NEXT: mulhwu r4, r3, r4 106; P9BE-NEXT: sub r5, r3, r4 107; P9BE-NEXT: srwi r5, r5, 1 108; P9BE-NEXT: add r4, r5, r4 109; P9BE-NEXT: srwi r4, r4, 6 110; P9BE-NEXT: mulli r4, r4, 95 111; P9BE-NEXT: sub r3, r3, r4 112; P9BE-NEXT: sldi r3, r3, 48 113; P9BE-NEXT: mtvsrd v2, r3 114; P9BE-NEXT: vmrghh v2, v2, v4 115; P9BE-NEXT: vmrghw v2, v2, v3 116; P9BE-NEXT: blr 117; 118; P8LE-LABEL: fold_urem_vec_1: 119; P8LE: # %bb.0: 120; P8LE-NEXT: xxswapd vs0, v2 121; P8LE-NEXT: lis r3, 22765 122; P8LE-NEXT: lis r7, 21399 123; P8LE-NEXT: lis r9, 16727 124; P8LE-NEXT: lis r10, 8456 125; P8LE-NEXT: ori r3, r3, 8969 126; P8LE-NEXT: ori r7, r7, 33437 127; P8LE-NEXT: ori r9, r9, 2287 128; P8LE-NEXT: ori r10, r10, 16913 129; P8LE-NEXT: mffprd r4, f0 130; P8LE-NEXT: clrldi r6, r4, 48 131; P8LE-NEXT: rldicl r5, r4, 32, 48 132; P8LE-NEXT: clrlwi r6, r6, 16 133; P8LE-NEXT: rldicl r8, r4, 16, 48 134; P8LE-NEXT: clrlwi r5, r5, 16 135; P8LE-NEXT: mulhwu r3, r6, r3 136; P8LE-NEXT: rldicl r4, r4, 48, 48 137; P8LE-NEXT: clrlwi r8, r8, 16 138; P8LE-NEXT: rlwinm r11, r4, 30, 18, 31 139; P8LE-NEXT: mulhwu r7, r5, r7 140; P8LE-NEXT: clrlwi r4, r4, 16 141; P8LE-NEXT: mulhwu r9, r8, r9 142; P8LE-NEXT: mulhwu r10, r11, r10 143; P8LE-NEXT: sub r11, r6, r3 144; P8LE-NEXT: srwi r11, r11, 1 145; P8LE-NEXT: srwi r7, r7, 5 146; P8LE-NEXT: add r3, r11, r3 147; P8LE-NEXT: srwi r9, r9, 8 148; P8LE-NEXT: srwi r10, r10, 2 149; P8LE-NEXT: srwi r3, r3, 6 150; P8LE-NEXT: mulli r7, r7, 98 151; P8LE-NEXT: mulli r9, r9, 1003 152; P8LE-NEXT: mulli r3, r3, 95 153; P8LE-NEXT: mulli r10, r10, 124 154; P8LE-NEXT: sub r5, r5, r7 155; P8LE-NEXT: sub r7, r8, r9 156; P8LE-NEXT: sub r3, r6, r3 157; P8LE-NEXT: mtvsrd v2, r5 158; P8LE-NEXT: sub r4, r4, r10 159; P8LE-NEXT: mtvsrd v3, r7 160; P8LE-NEXT: mtvsrd v4, r3 161; P8LE-NEXT: mtvsrd v5, r4 162; P8LE-NEXT: vmrghh v2, v3, v2 163; P8LE-NEXT: vmrghh v3, v5, v4 164; P8LE-NEXT: vmrglw v2, v2, v3 165; P8LE-NEXT: blr 166; 167; P8BE-LABEL: fold_urem_vec_1: 168; P8BE: # %bb.0: 169; P8BE-NEXT: mfvsrd r4, v2 170; P8BE-NEXT: lis r3, 22765 171; P8BE-NEXT: lis r7, 16727 172; P8BE-NEXT: lis r9, 21399 173; P8BE-NEXT: lis r10, 8456 174; P8BE-NEXT: ori r3, r3, 8969 175; P8BE-NEXT: ori r7, r7, 2287 176; P8BE-NEXT: ori r9, r9, 33437 177; P8BE-NEXT: ori r10, r10, 16913 178; P8BE-NEXT: rldicl r6, r4, 16, 48 179; P8BE-NEXT: clrldi r5, r4, 48 180; P8BE-NEXT: clrlwi r6, r6, 16 181; P8BE-NEXT: rldicl r8, r4, 48, 48 182; P8BE-NEXT: clrlwi r5, r5, 16 183; P8BE-NEXT: mulhwu r3, r6, r3 184; P8BE-NEXT: rldicl r4, r4, 32, 48 185; P8BE-NEXT: clrlwi r8, r8, 16 186; P8BE-NEXT: mulhwu r7, r5, r7 187; P8BE-NEXT: rlwinm r11, r4, 30, 18, 31 188; P8BE-NEXT: clrlwi r4, r4, 16 189; P8BE-NEXT: mulhwu r9, r8, r9 190; P8BE-NEXT: mulhwu r10, r11, r10 191; P8BE-NEXT: sub r11, r6, r3 192; P8BE-NEXT: srwi r11, r11, 1 193; P8BE-NEXT: srwi r7, r7, 8 194; P8BE-NEXT: add r3, r11, r3 195; P8BE-NEXT: srwi r9, r9, 5 196; P8BE-NEXT: srwi r10, r10, 2 197; P8BE-NEXT: mulli r7, r7, 1003 198; P8BE-NEXT: srwi r3, r3, 6 199; P8BE-NEXT: mulli r9, r9, 98 200; P8BE-NEXT: mulli r3, r3, 95 201; P8BE-NEXT: mulli r10, r10, 124 202; P8BE-NEXT: sub r5, r5, r7 203; P8BE-NEXT: sub r7, r8, r9 204; P8BE-NEXT: sldi r5, r5, 48 205; P8BE-NEXT: sub r3, r6, r3 206; P8BE-NEXT: sub r4, r4, r10 207; P8BE-NEXT: mtvsrd v2, r5 208; P8BE-NEXT: sldi r5, r7, 48 209; P8BE-NEXT: sldi r3, r3, 48 210; P8BE-NEXT: sldi r4, r4, 48 211; P8BE-NEXT: mtvsrd v3, r5 212; P8BE-NEXT: mtvsrd v4, r3 213; P8BE-NEXT: mtvsrd v5, r4 214; P8BE-NEXT: vmrghh v2, v3, v2 215; P8BE-NEXT: vmrghh v3, v4, v5 216; P8BE-NEXT: vmrghw v2, v3, v2 217; P8BE-NEXT: blr 218 %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003> 219 ret <4 x i16> %1 220} 221 222define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { 223; P9LE-LABEL: fold_urem_vec_2: 224; P9LE: # %bb.0: 225; P9LE-NEXT: li r3, 0 226; P9LE-NEXT: lis r4, 22765 227; P9LE-NEXT: vextuhrx r3, r3, v2 228; P9LE-NEXT: ori r4, r4, 8969 229; P9LE-NEXT: clrlwi r3, r3, 16 230; P9LE-NEXT: mulhwu r5, r3, r4 231; P9LE-NEXT: sub r6, r3, r5 232; P9LE-NEXT: srwi r6, r6, 1 233; P9LE-NEXT: add r5, r6, r5 234; P9LE-NEXT: srwi r5, r5, 6 235; P9LE-NEXT: mulli r5, r5, 95 236; P9LE-NEXT: sub r3, r3, r5 237; P9LE-NEXT: mtvsrd v3, r3 238; P9LE-NEXT: li r3, 2 239; P9LE-NEXT: vextuhrx r3, r3, v2 240; P9LE-NEXT: clrlwi r3, r3, 16 241; P9LE-NEXT: mulhwu r5, r3, r4 242; P9LE-NEXT: sub r6, r3, r5 243; P9LE-NEXT: srwi r6, r6, 1 244; P9LE-NEXT: add r5, r6, r5 245; P9LE-NEXT: srwi r5, r5, 6 246; P9LE-NEXT: mulli r5, r5, 95 247; P9LE-NEXT: sub r3, r3, r5 248; P9LE-NEXT: mtvsrd v4, r3 249; P9LE-NEXT: li r3, 4 250; P9LE-NEXT: vextuhrx r3, r3, v2 251; P9LE-NEXT: vmrghh v3, v4, v3 252; P9LE-NEXT: clrlwi r3, r3, 16 253; P9LE-NEXT: mulhwu r5, r3, r4 254; P9LE-NEXT: sub r6, r3, r5 255; P9LE-NEXT: srwi r6, r6, 1 256; P9LE-NEXT: add r5, r6, r5 257; P9LE-NEXT: srwi r5, r5, 6 258; P9LE-NEXT: mulli r5, r5, 95 259; P9LE-NEXT: sub r3, r3, r5 260; P9LE-NEXT: mtvsrd v4, r3 261; P9LE-NEXT: li r3, 6 262; P9LE-NEXT: vextuhrx r3, r3, v2 263; P9LE-NEXT: clrlwi r3, r3, 16 264; P9LE-NEXT: mulhwu r4, r3, r4 265; P9LE-NEXT: sub r5, r3, r4 266; P9LE-NEXT: srwi r5, r5, 1 267; P9LE-NEXT: add r4, r5, r4 268; P9LE-NEXT: srwi r4, r4, 6 269; P9LE-NEXT: mulli r4, r4, 95 270; P9LE-NEXT: sub r3, r3, r4 271; P9LE-NEXT: mtvsrd v2, r3 272; P9LE-NEXT: vmrghh v2, v2, v4 273; P9LE-NEXT: vmrglw v2, v2, v3 274; P9LE-NEXT: blr 275; 276; P9BE-LABEL: fold_urem_vec_2: 277; P9BE: # %bb.0: 278; P9BE-NEXT: li r3, 6 279; P9BE-NEXT: lis r4, 22765 280; P9BE-NEXT: vextuhlx r3, r3, v2 281; P9BE-NEXT: ori r4, r4, 8969 282; P9BE-NEXT: clrlwi r3, r3, 16 283; P9BE-NEXT: mulhwu r5, r3, r4 284; P9BE-NEXT: sub r6, r3, r5 285; P9BE-NEXT: srwi r6, r6, 1 286; P9BE-NEXT: add r5, r6, r5 287; P9BE-NEXT: srwi r5, r5, 6 288; P9BE-NEXT: mulli r5, r5, 95 289; P9BE-NEXT: sub r3, r3, r5 290; P9BE-NEXT: sldi r3, r3, 48 291; P9BE-NEXT: mtvsrd v3, r3 292; P9BE-NEXT: li r3, 4 293; P9BE-NEXT: vextuhlx r3, r3, v2 294; P9BE-NEXT: clrlwi r3, r3, 16 295; P9BE-NEXT: mulhwu r5, r3, r4 296; P9BE-NEXT: sub r6, r3, r5 297; P9BE-NEXT: srwi r6, r6, 1 298; P9BE-NEXT: add r5, r6, r5 299; P9BE-NEXT: srwi r5, r5, 6 300; P9BE-NEXT: mulli r5, r5, 95 301; P9BE-NEXT: sub r3, r3, r5 302; P9BE-NEXT: sldi r3, r3, 48 303; P9BE-NEXT: mtvsrd v4, r3 304; P9BE-NEXT: li r3, 2 305; P9BE-NEXT: vextuhlx r3, r3, v2 306; P9BE-NEXT: vmrghh v3, v4, v3 307; P9BE-NEXT: clrlwi r3, r3, 16 308; P9BE-NEXT: mulhwu r5, r3, r4 309; P9BE-NEXT: sub r6, r3, r5 310; P9BE-NEXT: srwi r6, r6, 1 311; P9BE-NEXT: add r5, r6, r5 312; P9BE-NEXT: srwi r5, r5, 6 313; P9BE-NEXT: mulli r5, r5, 95 314; P9BE-NEXT: sub r3, r3, r5 315; P9BE-NEXT: sldi r3, r3, 48 316; P9BE-NEXT: mtvsrd v4, r3 317; P9BE-NEXT: li r3, 0 318; P9BE-NEXT: vextuhlx r3, r3, v2 319; P9BE-NEXT: clrlwi r3, r3, 16 320; P9BE-NEXT: mulhwu r4, r3, r4 321; P9BE-NEXT: sub r5, r3, r4 322; P9BE-NEXT: srwi r5, r5, 1 323; P9BE-NEXT: add r4, r5, r4 324; P9BE-NEXT: srwi r4, r4, 6 325; P9BE-NEXT: mulli r4, r4, 95 326; P9BE-NEXT: sub r3, r3, r4 327; P9BE-NEXT: sldi r3, r3, 48 328; P9BE-NEXT: mtvsrd v2, r3 329; P9BE-NEXT: vmrghh v2, v2, v4 330; P9BE-NEXT: vmrghw v2, v2, v3 331; P9BE-NEXT: blr 332; 333; P8LE-LABEL: fold_urem_vec_2: 334; P8LE: # %bb.0: 335; P8LE-NEXT: xxswapd vs0, v2 336; P8LE-NEXT: lis r3, 22765 337; P8LE-NEXT: ori r3, r3, 8969 338; P8LE-NEXT: mffprd r4, f0 339; P8LE-NEXT: clrldi r5, r4, 48 340; P8LE-NEXT: rldicl r6, r4, 48, 48 341; P8LE-NEXT: clrlwi r5, r5, 16 342; P8LE-NEXT: rldicl r7, r4, 32, 48 343; P8LE-NEXT: clrlwi r6, r6, 16 344; P8LE-NEXT: mulhwu r8, r5, r3 345; P8LE-NEXT: rldicl r4, r4, 16, 48 346; P8LE-NEXT: clrlwi r7, r7, 16 347; P8LE-NEXT: mulhwu r9, r6, r3 348; P8LE-NEXT: clrlwi r4, r4, 16 349; P8LE-NEXT: mulhwu r10, r7, r3 350; P8LE-NEXT: mulhwu r3, r4, r3 351; P8LE-NEXT: sub r11, r5, r8 352; P8LE-NEXT: sub r12, r6, r9 353; P8LE-NEXT: srwi r11, r11, 1 354; P8LE-NEXT: add r8, r11, r8 355; P8LE-NEXT: sub r11, r7, r10 356; P8LE-NEXT: srwi r12, r12, 1 357; P8LE-NEXT: add r9, r12, r9 358; P8LE-NEXT: sub r12, r4, r3 359; P8LE-NEXT: srwi r11, r11, 1 360; P8LE-NEXT: srwi r8, r8, 6 361; P8LE-NEXT: add r10, r11, r10 362; P8LE-NEXT: srwi r11, r12, 1 363; P8LE-NEXT: srwi r9, r9, 6 364; P8LE-NEXT: add r3, r11, r3 365; P8LE-NEXT: mulli r8, r8, 95 366; P8LE-NEXT: srwi r10, r10, 6 367; P8LE-NEXT: srwi r3, r3, 6 368; P8LE-NEXT: mulli r9, r9, 95 369; P8LE-NEXT: mulli r10, r10, 95 370; P8LE-NEXT: mulli r3, r3, 95 371; P8LE-NEXT: sub r5, r5, r8 372; P8LE-NEXT: sub r6, r6, r9 373; P8LE-NEXT: mtvsrd v2, r5 374; P8LE-NEXT: sub r5, r7, r10 375; P8LE-NEXT: sub r3, r4, r3 376; P8LE-NEXT: mtvsrd v3, r6 377; P8LE-NEXT: mtvsrd v4, r5 378; P8LE-NEXT: mtvsrd v5, r3 379; P8LE-NEXT: vmrghh v2, v3, v2 380; P8LE-NEXT: vmrghh v3, v5, v4 381; P8LE-NEXT: vmrglw v2, v3, v2 382; P8LE-NEXT: blr 383; 384; P8BE-LABEL: fold_urem_vec_2: 385; P8BE: # %bb.0: 386; P8BE-NEXT: mfvsrd r4, v2 387; P8BE-NEXT: lis r3, 22765 388; P8BE-NEXT: ori r3, r3, 8969 389; P8BE-NEXT: clrldi r5, r4, 48 390; P8BE-NEXT: rldicl r6, r4, 48, 48 391; P8BE-NEXT: clrlwi r5, r5, 16 392; P8BE-NEXT: rldicl r7, r4, 32, 48 393; P8BE-NEXT: clrlwi r6, r6, 16 394; P8BE-NEXT: mulhwu r8, r5, r3 395; P8BE-NEXT: rldicl r4, r4, 16, 48 396; P8BE-NEXT: clrlwi r7, r7, 16 397; P8BE-NEXT: mulhwu r9, r6, r3 398; P8BE-NEXT: clrlwi r4, r4, 16 399; P8BE-NEXT: mulhwu r10, r7, r3 400; P8BE-NEXT: mulhwu r3, r4, r3 401; P8BE-NEXT: sub r11, r5, r8 402; P8BE-NEXT: sub r12, r6, r9 403; P8BE-NEXT: srwi r11, r11, 1 404; P8BE-NEXT: add r8, r11, r8 405; P8BE-NEXT: sub r11, r7, r10 406; P8BE-NEXT: srwi r12, r12, 1 407; P8BE-NEXT: add r9, r12, r9 408; P8BE-NEXT: sub r12, r4, r3 409; P8BE-NEXT: srwi r11, r11, 1 410; P8BE-NEXT: srwi r8, r8, 6 411; P8BE-NEXT: add r10, r11, r10 412; P8BE-NEXT: srwi r11, r12, 1 413; P8BE-NEXT: srwi r9, r9, 6 414; P8BE-NEXT: add r3, r11, r3 415; P8BE-NEXT: srwi r10, r10, 6 416; P8BE-NEXT: srwi r3, r3, 6 417; P8BE-NEXT: mulli r8, r8, 95 418; P8BE-NEXT: mulli r9, r9, 95 419; P8BE-NEXT: mulli r10, r10, 95 420; P8BE-NEXT: mulli r3, r3, 95 421; P8BE-NEXT: sub r5, r5, r8 422; P8BE-NEXT: sub r6, r6, r9 423; P8BE-NEXT: sub r7, r7, r10 424; P8BE-NEXT: sub r3, r4, r3 425; P8BE-NEXT: sldi r5, r5, 48 426; P8BE-NEXT: sldi r6, r6, 48 427; P8BE-NEXT: sldi r4, r7, 48 428; P8BE-NEXT: mtvsrd v2, r5 429; P8BE-NEXT: sldi r3, r3, 48 430; P8BE-NEXT: mtvsrd v3, r6 431; P8BE-NEXT: mtvsrd v4, r4 432; P8BE-NEXT: mtvsrd v5, r3 433; P8BE-NEXT: vmrghh v2, v3, v2 434; P8BE-NEXT: vmrghh v3, v5, v4 435; P8BE-NEXT: vmrghw v2, v3, v2 436; P8BE-NEXT: blr 437 %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 438 ret <4 x i16> %1 439} 440 441 442; Don't fold if we can combine urem with udiv. 443define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { 444; P9LE-LABEL: combine_urem_udiv: 445; P9LE: # %bb.0: 446; P9LE-NEXT: li r3, 0 447; P9LE-NEXT: lis r4, 22765 448; P9LE-NEXT: vextuhrx r3, r3, v2 449; P9LE-NEXT: ori r4, r4, 8969 450; P9LE-NEXT: clrlwi r3, r3, 16 451; P9LE-NEXT: mulhwu r5, r3, r4 452; P9LE-NEXT: sub r6, r3, r5 453; P9LE-NEXT: srwi r6, r6, 1 454; P9LE-NEXT: add r5, r6, r5 455; P9LE-NEXT: srwi r5, r5, 6 456; P9LE-NEXT: mulli r6, r5, 95 457; P9LE-NEXT: sub r3, r3, r6 458; P9LE-NEXT: mtvsrd v3, r3 459; P9LE-NEXT: li r3, 2 460; P9LE-NEXT: vextuhrx r3, r3, v2 461; P9LE-NEXT: clrlwi r6, r3, 16 462; P9LE-NEXT: mulhwu r7, r6, r4 463; P9LE-NEXT: sub r6, r6, r7 464; P9LE-NEXT: srwi r6, r6, 1 465; P9LE-NEXT: add r6, r6, r7 466; P9LE-NEXT: srwi r6, r6, 6 467; P9LE-NEXT: mulli r7, r6, 95 468; P9LE-NEXT: sub r3, r3, r7 469; P9LE-NEXT: mtvsrd v4, r3 470; P9LE-NEXT: li r3, 4 471; P9LE-NEXT: vextuhrx r3, r3, v2 472; P9LE-NEXT: vmrghh v3, v4, v3 473; P9LE-NEXT: clrlwi r7, r3, 16 474; P9LE-NEXT: mulhwu r8, r7, r4 475; P9LE-NEXT: sub r7, r7, r8 476; P9LE-NEXT: srwi r7, r7, 1 477; P9LE-NEXT: add r7, r7, r8 478; P9LE-NEXT: srwi r7, r7, 6 479; P9LE-NEXT: mulli r8, r7, 95 480; P9LE-NEXT: sub r3, r3, r8 481; P9LE-NEXT: mtvsrd v4, r3 482; P9LE-NEXT: li r3, 6 483; P9LE-NEXT: vextuhrx r3, r3, v2 484; P9LE-NEXT: clrlwi r8, r3, 16 485; P9LE-NEXT: mulhwu r4, r8, r4 486; P9LE-NEXT: sub r8, r8, r4 487; P9LE-NEXT: srwi r8, r8, 1 488; P9LE-NEXT: add r4, r8, r4 489; P9LE-NEXT: srwi r4, r4, 6 490; P9LE-NEXT: mulli r8, r4, 95 491; P9LE-NEXT: mtvsrd v5, r4 492; P9LE-NEXT: sub r3, r3, r8 493; P9LE-NEXT: mtvsrd v2, r3 494; P9LE-NEXT: vmrghh v2, v2, v4 495; P9LE-NEXT: mtvsrd v4, r6 496; P9LE-NEXT: vmrglw v2, v2, v3 497; P9LE-NEXT: mtvsrd v3, r5 498; P9LE-NEXT: vmrghh v3, v4, v3 499; P9LE-NEXT: mtvsrd v4, r7 500; P9LE-NEXT: vmrghh v4, v5, v4 501; P9LE-NEXT: vmrglw v3, v4, v3 502; P9LE-NEXT: vadduhm v2, v2, v3 503; P9LE-NEXT: blr 504; 505; P9BE-LABEL: combine_urem_udiv: 506; P9BE: # %bb.0: 507; P9BE-NEXT: li r3, 6 508; P9BE-NEXT: lis r5, 22765 509; P9BE-NEXT: vextuhlx r3, r3, v2 510; P9BE-NEXT: ori r5, r5, 8969 511; P9BE-NEXT: clrlwi r4, r3, 16 512; P9BE-NEXT: mulhwu r6, r4, r5 513; P9BE-NEXT: sub r4, r4, r6 514; P9BE-NEXT: srwi r4, r4, 1 515; P9BE-NEXT: add r4, r4, r6 516; P9BE-NEXT: srwi r4, r4, 6 517; P9BE-NEXT: mulli r6, r4, 95 518; P9BE-NEXT: sub r3, r3, r6 519; P9BE-NEXT: sldi r3, r3, 48 520; P9BE-NEXT: mtvsrd v3, r3 521; P9BE-NEXT: li r3, 4 522; P9BE-NEXT: vextuhlx r3, r3, v2 523; P9BE-NEXT: clrlwi r6, r3, 16 524; P9BE-NEXT: mulhwu r7, r6, r5 525; P9BE-NEXT: sub r6, r6, r7 526; P9BE-NEXT: srwi r6, r6, 1 527; P9BE-NEXT: add r6, r6, r7 528; P9BE-NEXT: srwi r6, r6, 6 529; P9BE-NEXT: mulli r7, r6, 95 530; P9BE-NEXT: sub r3, r3, r7 531; P9BE-NEXT: sldi r3, r3, 48 532; P9BE-NEXT: mtvsrd v4, r3 533; P9BE-NEXT: li r3, 2 534; P9BE-NEXT: vextuhlx r3, r3, v2 535; P9BE-NEXT: vmrghh v3, v4, v3 536; P9BE-NEXT: clrlwi r7, r3, 16 537; P9BE-NEXT: mulhwu r8, r7, r5 538; P9BE-NEXT: sub r7, r7, r8 539; P9BE-NEXT: srwi r7, r7, 1 540; P9BE-NEXT: add r7, r7, r8 541; P9BE-NEXT: srwi r7, r7, 6 542; P9BE-NEXT: mulli r8, r7, 95 543; P9BE-NEXT: sub r3, r3, r8 544; P9BE-NEXT: sldi r3, r3, 48 545; P9BE-NEXT: mtvsrd v4, r3 546; P9BE-NEXT: li r3, 0 547; P9BE-NEXT: vextuhlx r3, r3, v2 548; P9BE-NEXT: clrlwi r3, r3, 16 549; P9BE-NEXT: mulhwu r5, r3, r5 550; P9BE-NEXT: sub r8, r3, r5 551; P9BE-NEXT: srwi r8, r8, 1 552; P9BE-NEXT: add r5, r8, r5 553; P9BE-NEXT: srwi r5, r5, 6 554; P9BE-NEXT: mulli r8, r5, 95 555; P9BE-NEXT: sub r3, r3, r8 556; P9BE-NEXT: sldi r3, r3, 48 557; P9BE-NEXT: mtvsrd v2, r3 558; P9BE-NEXT: sldi r3, r4, 48 559; P9BE-NEXT: vmrghh v2, v2, v4 560; P9BE-NEXT: vmrghw v2, v2, v3 561; P9BE-NEXT: mtvsrd v3, r3 562; P9BE-NEXT: sldi r3, r6, 48 563; P9BE-NEXT: mtvsrd v4, r3 564; P9BE-NEXT: sldi r3, r7, 48 565; P9BE-NEXT: vmrghh v3, v4, v3 566; P9BE-NEXT: mtvsrd v4, r3 567; P9BE-NEXT: sldi r3, r5, 48 568; P9BE-NEXT: mtvsrd v5, r3 569; P9BE-NEXT: vmrghh v4, v5, v4 570; P9BE-NEXT: vmrghw v3, v4, v3 571; P9BE-NEXT: vadduhm v2, v2, v3 572; P9BE-NEXT: blr 573; 574; P8LE-LABEL: combine_urem_udiv: 575; P8LE: # %bb.0: 576; P8LE-NEXT: xxswapd vs0, v2 577; P8LE-NEXT: lis r3, 22765 578; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill 579; P8LE-NEXT: ori r3, r3, 8969 580; P8LE-NEXT: mffprd r4, f0 581; P8LE-NEXT: clrldi r5, r4, 48 582; P8LE-NEXT: rldicl r6, r4, 48, 48 583; P8LE-NEXT: clrlwi r5, r5, 16 584; P8LE-NEXT: clrlwi r8, r6, 16 585; P8LE-NEXT: rldicl r7, r4, 32, 48 586; P8LE-NEXT: rldicl r4, r4, 16, 48 587; P8LE-NEXT: mulhwu r9, r5, r3 588; P8LE-NEXT: mulhwu r11, r8, r3 589; P8LE-NEXT: clrlwi r10, r7, 16 590; P8LE-NEXT: clrlwi r12, r4, 16 591; P8LE-NEXT: mulhwu r0, r10, r3 592; P8LE-NEXT: mulhwu r3, r12, r3 593; P8LE-NEXT: sub r30, r5, r9 594; P8LE-NEXT: sub r8, r8, r11 595; P8LE-NEXT: srwi r30, r30, 1 596; P8LE-NEXT: srwi r8, r8, 1 597; P8LE-NEXT: sub r10, r10, r0 598; P8LE-NEXT: add r9, r30, r9 599; P8LE-NEXT: add r8, r8, r11 600; P8LE-NEXT: sub r11, r12, r3 601; P8LE-NEXT: srwi r10, r10, 1 602; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 603; P8LE-NEXT: srwi r9, r9, 6 604; P8LE-NEXT: srwi r11, r11, 1 605; P8LE-NEXT: srwi r8, r8, 6 606; P8LE-NEXT: add r10, r10, r0 607; P8LE-NEXT: mulli r12, r9, 95 608; P8LE-NEXT: add r3, r11, r3 609; P8LE-NEXT: mtvsrd v2, r9 610; P8LE-NEXT: srwi r10, r10, 6 611; P8LE-NEXT: mulli r9, r8, 95 612; P8LE-NEXT: srwi r3, r3, 6 613; P8LE-NEXT: mtvsrd v3, r8 614; P8LE-NEXT: mulli r8, r10, 95 615; P8LE-NEXT: mtvsrd v4, r10 616; P8LE-NEXT: mulli r10, r3, 95 617; P8LE-NEXT: vmrghh v2, v3, v2 618; P8LE-NEXT: sub r5, r5, r12 619; P8LE-NEXT: sub r6, r6, r9 620; P8LE-NEXT: mtvsrd v3, r5 621; P8LE-NEXT: mtvsrd v5, r6 622; P8LE-NEXT: sub r5, r7, r8 623; P8LE-NEXT: sub r4, r4, r10 624; P8LE-NEXT: mtvsrd v0, r5 625; P8LE-NEXT: mtvsrd v1, r4 626; P8LE-NEXT: vmrghh v3, v5, v3 627; P8LE-NEXT: mtvsrd v5, r3 628; P8LE-NEXT: vmrghh v0, v1, v0 629; P8LE-NEXT: vmrghh v4, v5, v4 630; P8LE-NEXT: vmrglw v3, v0, v3 631; P8LE-NEXT: vmrglw v2, v4, v2 632; P8LE-NEXT: vadduhm v2, v3, v2 633; P8LE-NEXT: blr 634; 635; P8BE-LABEL: combine_urem_udiv: 636; P8BE: # %bb.0: 637; P8BE-NEXT: mfvsrd r5, v2 638; P8BE-NEXT: lis r4, 22765 639; P8BE-NEXT: ori r4, r4, 8969 640; P8BE-NEXT: clrldi r3, r5, 48 641; P8BE-NEXT: rldicl r6, r5, 48, 48 642; P8BE-NEXT: clrlwi r8, r3, 16 643; P8BE-NEXT: rldicl r7, r5, 32, 48 644; P8BE-NEXT: clrlwi r9, r6, 16 645; P8BE-NEXT: rldicl r5, r5, 16, 48 646; P8BE-NEXT: mulhwu r10, r8, r4 647; P8BE-NEXT: clrlwi r11, r7, 16 648; P8BE-NEXT: mulhwu r12, r9, r4 649; P8BE-NEXT: clrlwi r5, r5, 16 650; P8BE-NEXT: mulhwu r0, r11, r4 651; P8BE-NEXT: mulhwu r4, r5, r4 652; P8BE-NEXT: sub r8, r8, r10 653; P8BE-NEXT: sub r9, r9, r12 654; P8BE-NEXT: srwi r8, r8, 1 655; P8BE-NEXT: add r8, r8, r10 656; P8BE-NEXT: sub r10, r11, r0 657; P8BE-NEXT: srwi r9, r9, 1 658; P8BE-NEXT: sub r11, r5, r4 659; P8BE-NEXT: add r9, r9, r12 660; P8BE-NEXT: srwi r8, r8, 6 661; P8BE-NEXT: srwi r11, r11, 1 662; P8BE-NEXT: srwi r10, r10, 1 663; P8BE-NEXT: srwi r9, r9, 6 664; P8BE-NEXT: mulli r12, r8, 95 665; P8BE-NEXT: add r4, r11, r4 666; P8BE-NEXT: add r10, r10, r0 667; P8BE-NEXT: mulli r11, r9, 95 668; P8BE-NEXT: srwi r4, r4, 6 669; P8BE-NEXT: srwi r10, r10, 6 670; P8BE-NEXT: sldi r9, r9, 48 671; P8BE-NEXT: sldi r8, r8, 48 672; P8BE-NEXT: mtvsrd v3, r9 673; P8BE-NEXT: mulli r9, r4, 95 674; P8BE-NEXT: mtvsrd v2, r8 675; P8BE-NEXT: mulli r8, r10, 95 676; P8BE-NEXT: sub r3, r3, r12 677; P8BE-NEXT: sub r6, r6, r11 678; P8BE-NEXT: sldi r3, r3, 48 679; P8BE-NEXT: vmrghh v2, v3, v2 680; P8BE-NEXT: sldi r6, r6, 48 681; P8BE-NEXT: sldi r10, r10, 48 682; P8BE-NEXT: mtvsrd v3, r3 683; P8BE-NEXT: sub r3, r5, r9 684; P8BE-NEXT: sub r7, r7, r8 685; P8BE-NEXT: mtvsrd v5, r6 686; P8BE-NEXT: sldi r3, r3, 48 687; P8BE-NEXT: sldi r5, r7, 48 688; P8BE-NEXT: mtvsrd v1, r3 689; P8BE-NEXT: sldi r3, r4, 48 690; P8BE-NEXT: mtvsrd v4, r10 691; P8BE-NEXT: mtvsrd v0, r5 692; P8BE-NEXT: vmrghh v3, v5, v3 693; P8BE-NEXT: mtvsrd v5, r3 694; P8BE-NEXT: vmrghh v0, v1, v0 695; P8BE-NEXT: vmrghh v4, v5, v4 696; P8BE-NEXT: vmrghw v3, v0, v3 697; P8BE-NEXT: vmrghw v2, v4, v2 698; P8BE-NEXT: vadduhm v2, v3, v2 699; P8BE-NEXT: blr 700 %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 701 %2 = udiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 702 %3 = add <4 x i16> %1, %2 703 ret <4 x i16> %3 704} 705 706; Don't fold for divisors that are a power of two. 707define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { 708; P9LE-LABEL: dont_fold_urem_power_of_two: 709; P9LE: # %bb.0: 710; P9LE-NEXT: li r3, 0 711; P9LE-NEXT: lis r4, 22765 712; P9LE-NEXT: vextuhrx r3, r3, v2 713; P9LE-NEXT: ori r4, r4, 8969 714; P9LE-NEXT: clrlwi r3, r3, 26 715; P9LE-NEXT: mtvsrd v3, r3 716; P9LE-NEXT: li r3, 2 717; P9LE-NEXT: vextuhrx r3, r3, v2 718; P9LE-NEXT: clrlwi r3, r3, 27 719; P9LE-NEXT: mtvsrd v4, r3 720; P9LE-NEXT: li r3, 6 721; P9LE-NEXT: vextuhrx r3, r3, v2 722; P9LE-NEXT: vmrghh v3, v4, v3 723; P9LE-NEXT: clrlwi r3, r3, 16 724; P9LE-NEXT: mulhwu r4, r3, r4 725; P9LE-NEXT: sub r5, r3, r4 726; P9LE-NEXT: srwi r5, r5, 1 727; P9LE-NEXT: add r4, r5, r4 728; P9LE-NEXT: srwi r4, r4, 6 729; P9LE-NEXT: mulli r4, r4, 95 730; P9LE-NEXT: sub r3, r3, r4 731; P9LE-NEXT: mtvsrd v4, r3 732; P9LE-NEXT: li r3, 4 733; P9LE-NEXT: vextuhrx r3, r3, v2 734; P9LE-NEXT: clrlwi r3, r3, 29 735; P9LE-NEXT: mtvsrd v2, r3 736; P9LE-NEXT: vmrghh v2, v4, v2 737; P9LE-NEXT: vmrglw v2, v2, v3 738; P9LE-NEXT: blr 739; 740; P9BE-LABEL: dont_fold_urem_power_of_two: 741; P9BE: # %bb.0: 742; P9BE-NEXT: li r3, 2 743; P9BE-NEXT: lis r4, 22765 744; P9BE-NEXT: vextuhlx r3, r3, v2 745; P9BE-NEXT: ori r4, r4, 8969 746; P9BE-NEXT: clrlwi r3, r3, 27 747; P9BE-NEXT: sldi r3, r3, 48 748; P9BE-NEXT: mtvsrd v3, r3 749; P9BE-NEXT: li r3, 0 750; P9BE-NEXT: vextuhlx r3, r3, v2 751; P9BE-NEXT: clrlwi r3, r3, 26 752; P9BE-NEXT: sldi r3, r3, 48 753; P9BE-NEXT: mtvsrd v4, r3 754; P9BE-NEXT: li r3, 6 755; P9BE-NEXT: vextuhlx r3, r3, v2 756; P9BE-NEXT: vmrghh v3, v4, v3 757; P9BE-NEXT: clrlwi r3, r3, 16 758; P9BE-NEXT: mulhwu r4, r3, r4 759; P9BE-NEXT: sub r5, r3, r4 760; P9BE-NEXT: srwi r5, r5, 1 761; P9BE-NEXT: add r4, r5, r4 762; P9BE-NEXT: srwi r4, r4, 6 763; P9BE-NEXT: mulli r4, r4, 95 764; P9BE-NEXT: sub r3, r3, r4 765; P9BE-NEXT: sldi r3, r3, 48 766; P9BE-NEXT: mtvsrd v4, r3 767; P9BE-NEXT: li r3, 4 768; P9BE-NEXT: vextuhlx r3, r3, v2 769; P9BE-NEXT: clrlwi r3, r3, 29 770; P9BE-NEXT: sldi r3, r3, 48 771; P9BE-NEXT: mtvsrd v2, r3 772; P9BE-NEXT: vmrghh v2, v2, v4 773; P9BE-NEXT: vmrghw v2, v3, v2 774; P9BE-NEXT: blr 775; 776; P8LE-LABEL: dont_fold_urem_power_of_two: 777; P8LE: # %bb.0: 778; P8LE-NEXT: xxswapd vs0, v2 779; P8LE-NEXT: lis r3, 22765 780; P8LE-NEXT: ori r3, r3, 8969 781; P8LE-NEXT: mffprd r4, f0 782; P8LE-NEXT: rldicl r5, r4, 16, 48 783; P8LE-NEXT: rldicl r7, r4, 48, 48 784; P8LE-NEXT: clrlwi r5, r5, 16 785; P8LE-NEXT: mulhwu r3, r5, r3 786; P8LE-NEXT: sub r6, r5, r3 787; P8LE-NEXT: srwi r6, r6, 1 788; P8LE-NEXT: add r3, r6, r3 789; P8LE-NEXT: clrldi r6, r4, 48 790; P8LE-NEXT: srwi r3, r3, 6 791; P8LE-NEXT: clrlwi r6, r6, 26 792; P8LE-NEXT: mulli r3, r3, 95 793; P8LE-NEXT: rldicl r4, r4, 32, 48 794; P8LE-NEXT: mtvsrd v2, r6 795; P8LE-NEXT: clrlwi r6, r7, 27 796; P8LE-NEXT: clrlwi r4, r4, 29 797; P8LE-NEXT: mtvsrd v3, r6 798; P8LE-NEXT: mtvsrd v5, r4 799; P8LE-NEXT: vmrghh v2, v3, v2 800; P8LE-NEXT: sub r3, r5, r3 801; P8LE-NEXT: mtvsrd v4, r3 802; P8LE-NEXT: vmrghh v3, v4, v5 803; P8LE-NEXT: vmrglw v2, v3, v2 804; P8LE-NEXT: blr 805; 806; P8BE-LABEL: dont_fold_urem_power_of_two: 807; P8BE: # %bb.0: 808; P8BE-NEXT: mfvsrd r4, v2 809; P8BE-NEXT: lis r3, 22765 810; P8BE-NEXT: ori r3, r3, 8969 811; P8BE-NEXT: clrldi r5, r4, 48 812; P8BE-NEXT: rldicl r7, r4, 16, 48 813; P8BE-NEXT: clrlwi r5, r5, 16 814; P8BE-NEXT: clrlwi r7, r7, 26 815; P8BE-NEXT: mulhwu r3, r5, r3 816; P8BE-NEXT: sub r6, r5, r3 817; P8BE-NEXT: srwi r6, r6, 1 818; P8BE-NEXT: add r3, r6, r3 819; P8BE-NEXT: rldicl r6, r4, 32, 48 820; P8BE-NEXT: srwi r3, r3, 6 821; P8BE-NEXT: rldicl r4, r4, 48, 48 822; P8BE-NEXT: clrlwi r6, r6, 27 823; P8BE-NEXT: mulli r3, r3, 95 824; P8BE-NEXT: sldi r6, r6, 48 825; P8BE-NEXT: clrlwi r4, r4, 29 826; P8BE-NEXT: mtvsrd v2, r6 827; P8BE-NEXT: sldi r6, r7, 48 828; P8BE-NEXT: sldi r4, r4, 48 829; P8BE-NEXT: mtvsrd v3, r6 830; P8BE-NEXT: mtvsrd v5, r4 831; P8BE-NEXT: sub r3, r5, r3 832; P8BE-NEXT: vmrghh v2, v3, v2 833; P8BE-NEXT: sldi r3, r3, 48 834; P8BE-NEXT: mtvsrd v4, r3 835; P8BE-NEXT: vmrghh v3, v5, v4 836; P8BE-NEXT: vmrghw v2, v2, v3 837; P8BE-NEXT: blr 838 %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95> 839 ret <4 x i16> %1 840} 841 842; Don't fold if the divisor is one. 843define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { 844; P9LE-LABEL: dont_fold_urem_one: 845; P9LE: # %bb.0: 846; P9LE-NEXT: li r3, 4 847; P9LE-NEXT: lis r4, -19946 848; P9LE-NEXT: lis r5, -14230 849; P9LE-NEXT: vextuhrx r3, r3, v2 850; P9LE-NEXT: ori r4, r4, 17097 851; P9LE-NEXT: ori r5, r5, 30865 852; P9LE-NEXT: clrlwi r3, r3, 16 853; P9LE-NEXT: mulhwu r4, r3, r4 854; P9LE-NEXT: srwi r4, r4, 4 855; P9LE-NEXT: mulli r4, r4, 23 856; P9LE-NEXT: sub r3, r3, r4 857; P9LE-NEXT: lis r4, 24749 858; P9LE-NEXT: mtvsrd v3, r3 859; P9LE-NEXT: li r3, 6 860; P9LE-NEXT: ori r4, r4, 47143 861; P9LE-NEXT: vextuhrx r3, r3, v2 862; P9LE-NEXT: clrlwi r3, r3, 16 863; P9LE-NEXT: mulhwu r4, r3, r4 864; P9LE-NEXT: srwi r4, r4, 11 865; P9LE-NEXT: mulli r4, r4, 5423 866; P9LE-NEXT: sub r3, r3, r4 867; P9LE-NEXT: mtvsrd v4, r3 868; P9LE-NEXT: li r3, 2 869; P9LE-NEXT: vextuhrx r3, r3, v2 870; P9LE-NEXT: vmrghh v3, v4, v3 871; P9LE-NEXT: clrlwi r4, r3, 16 872; P9LE-NEXT: rlwinm r3, r3, 31, 17, 31 873; P9LE-NEXT: mulhwu r3, r3, r5 874; P9LE-NEXT: srwi r3, r3, 8 875; P9LE-NEXT: mulli r3, r3, 654 876; P9LE-NEXT: sub r3, r4, r3 877; P9LE-NEXT: mtvsrd v2, r3 878; P9LE-NEXT: li r3, 0 879; P9LE-NEXT: mtvsrd v4, r3 880; P9LE-NEXT: vmrghh v2, v2, v4 881; P9LE-NEXT: vmrglw v2, v3, v2 882; P9LE-NEXT: blr 883; 884; P9BE-LABEL: dont_fold_urem_one: 885; P9BE: # %bb.0: 886; P9BE-NEXT: li r3, 6 887; P9BE-NEXT: lis r4, 24749 888; P9BE-NEXT: lis r5, -14230 889; P9BE-NEXT: vextuhlx r3, r3, v2 890; P9BE-NEXT: ori r4, r4, 47143 891; P9BE-NEXT: ori r5, r5, 30865 892; P9BE-NEXT: clrlwi r3, r3, 16 893; P9BE-NEXT: mulhwu r4, r3, r4 894; P9BE-NEXT: srwi r4, r4, 11 895; P9BE-NEXT: mulli r4, r4, 5423 896; P9BE-NEXT: sub r3, r3, r4 897; P9BE-NEXT: lis r4, -19946 898; P9BE-NEXT: sldi r3, r3, 48 899; P9BE-NEXT: ori r4, r4, 17097 900; P9BE-NEXT: mtvsrd v3, r3 901; P9BE-NEXT: li r3, 4 902; P9BE-NEXT: vextuhlx r3, r3, v2 903; P9BE-NEXT: clrlwi r3, r3, 16 904; P9BE-NEXT: mulhwu r4, r3, r4 905; P9BE-NEXT: srwi r4, r4, 4 906; P9BE-NEXT: mulli r4, r4, 23 907; P9BE-NEXT: sub r3, r3, r4 908; P9BE-NEXT: sldi r3, r3, 48 909; P9BE-NEXT: mtvsrd v4, r3 910; P9BE-NEXT: li r3, 2 911; P9BE-NEXT: vextuhlx r3, r3, v2 912; P9BE-NEXT: vmrghh v3, v4, v3 913; P9BE-NEXT: clrlwi r4, r3, 16 914; P9BE-NEXT: rlwinm r3, r3, 31, 17, 31 915; P9BE-NEXT: mulhwu r3, r3, r5 916; P9BE-NEXT: srwi r3, r3, 8 917; P9BE-NEXT: mulli r3, r3, 654 918; P9BE-NEXT: sub r3, r4, r3 919; P9BE-NEXT: sldi r3, r3, 48 920; P9BE-NEXT: mtvsrd v2, r3 921; P9BE-NEXT: li r3, 0 922; P9BE-NEXT: sldi r3, r3, 48 923; P9BE-NEXT: mtvsrd v4, r3 924; P9BE-NEXT: vmrghh v2, v4, v2 925; P9BE-NEXT: vmrghw v2, v2, v3 926; P9BE-NEXT: blr 927; 928; P8LE-LABEL: dont_fold_urem_one: 929; P8LE: # %bb.0: 930; P8LE-NEXT: xxswapd vs0, v2 931; P8LE-NEXT: lis r3, -14230 932; P8LE-NEXT: lis r7, -19946 933; P8LE-NEXT: lis r9, 24749 934; P8LE-NEXT: ori r3, r3, 30865 935; P8LE-NEXT: ori r7, r7, 17097 936; P8LE-NEXT: mffprd r4, f0 937; P8LE-NEXT: rldicl r5, r4, 48, 48 938; P8LE-NEXT: rldicl r6, r4, 32, 48 939; P8LE-NEXT: rldicl r4, r4, 16, 48 940; P8LE-NEXT: rlwinm r8, r5, 31, 17, 31 941; P8LE-NEXT: clrlwi r6, r6, 16 942; P8LE-NEXT: clrlwi r5, r5, 16 943; P8LE-NEXT: mulhwu r3, r8, r3 944; P8LE-NEXT: ori r8, r9, 47143 945; P8LE-NEXT: clrlwi r4, r4, 16 946; P8LE-NEXT: li r9, 0 947; P8LE-NEXT: mulhwu r7, r6, r7 948; P8LE-NEXT: mulhwu r8, r4, r8 949; P8LE-NEXT: mtvsrd v2, r9 950; P8LE-NEXT: srwi r3, r3, 8 951; P8LE-NEXT: srwi r7, r7, 4 952; P8LE-NEXT: mulli r3, r3, 654 953; P8LE-NEXT: srwi r8, r8, 11 954; P8LE-NEXT: mulli r7, r7, 23 955; P8LE-NEXT: mulli r8, r8, 5423 956; P8LE-NEXT: sub r3, r5, r3 957; P8LE-NEXT: sub r5, r6, r7 958; P8LE-NEXT: mtvsrd v3, r3 959; P8LE-NEXT: sub r3, r4, r8 960; P8LE-NEXT: mtvsrd v4, r5 961; P8LE-NEXT: mtvsrd v5, r3 962; P8LE-NEXT: vmrghh v2, v3, v2 963; P8LE-NEXT: vmrghh v3, v5, v4 964; P8LE-NEXT: vmrglw v2, v3, v2 965; P8LE-NEXT: blr 966; 967; P8BE-LABEL: dont_fold_urem_one: 968; P8BE: # %bb.0: 969; P8BE-NEXT: mfvsrd r4, v2 970; P8BE-NEXT: lis r3, 24749 971; P8BE-NEXT: lis r7, -19946 972; P8BE-NEXT: lis r8, -14230 973; P8BE-NEXT: ori r3, r3, 47143 974; P8BE-NEXT: ori r7, r7, 17097 975; P8BE-NEXT: ori r8, r8, 30865 976; P8BE-NEXT: clrldi r5, r4, 48 977; P8BE-NEXT: rldicl r6, r4, 48, 48 978; P8BE-NEXT: rldicl r4, r4, 32, 48 979; P8BE-NEXT: clrlwi r5, r5, 16 980; P8BE-NEXT: clrlwi r6, r6, 16 981; P8BE-NEXT: mulhwu r3, r5, r3 982; P8BE-NEXT: rlwinm r9, r4, 31, 17, 31 983; P8BE-NEXT: clrlwi r4, r4, 16 984; P8BE-NEXT: mulhwu r7, r6, r7 985; P8BE-NEXT: mulhwu r8, r9, r8 986; P8BE-NEXT: li r9, 0 987; P8BE-NEXT: srwi r3, r3, 11 988; P8BE-NEXT: srwi r7, r7, 4 989; P8BE-NEXT: mulli r3, r3, 5423 990; P8BE-NEXT: srwi r8, r8, 8 991; P8BE-NEXT: mulli r7, r7, 23 992; P8BE-NEXT: mulli r8, r8, 654 993; P8BE-NEXT: sub r3, r5, r3 994; P8BE-NEXT: sldi r5, r9, 48 995; P8BE-NEXT: mtvsrd v2, r5 996; P8BE-NEXT: sub r5, r6, r7 997; P8BE-NEXT: sldi r3, r3, 48 998; P8BE-NEXT: sub r4, r4, r8 999; P8BE-NEXT: sldi r5, r5, 48 1000; P8BE-NEXT: mtvsrd v3, r3 1001; P8BE-NEXT: sldi r3, r4, 48 1002; P8BE-NEXT: mtvsrd v4, r5 1003; P8BE-NEXT: mtvsrd v5, r3 1004; P8BE-NEXT: vmrghh v3, v4, v3 1005; P8BE-NEXT: vmrghh v2, v2, v5 1006; P8BE-NEXT: vmrghw v2, v2, v3 1007; P8BE-NEXT: blr 1008 %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423> 1009 ret <4 x i16> %1 1010} 1011 1012; Don't fold if the divisor is 2^16. 1013define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { 1014; CHECK-LABEL: dont_fold_urem_i16_smax: 1015; CHECK: # %bb.0: 1016; CHECK-NEXT: blr 1017 %1 = urem <4 x i16> %x, <i16 1, i16 65536, i16 23, i16 5423> 1018 ret <4 x i16> %1 1019} 1020 1021; Don't fold i64 urem. 1022define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) { 1023; P9LE-LABEL: dont_fold_urem_i64: 1024; P9LE: # %bb.0: 1025; P9LE-NEXT: lis r4, 25644 1026; P9LE-NEXT: mfvsrld r3, v3 1027; P9LE-NEXT: ori r4, r4, 34192 1028; P9LE-NEXT: sldi r4, r4, 32 1029; P9LE-NEXT: oris r4, r4, 45590 1030; P9LE-NEXT: ori r4, r4, 17097 1031; P9LE-NEXT: mulhdu r4, r3, r4 1032; P9LE-NEXT: sub r5, r3, r4 1033; P9LE-NEXT: rldicl r5, r5, 63, 1 1034; P9LE-NEXT: add r4, r5, r4 1035; P9LE-NEXT: lis r5, -16037 1036; P9LE-NEXT: rldicl r4, r4, 60, 4 1037; P9LE-NEXT: ori r5, r5, 28749 1038; P9LE-NEXT: mulli r4, r4, 23 1039; P9LE-NEXT: sldi r5, r5, 32 1040; P9LE-NEXT: oris r5, r5, 52170 1041; P9LE-NEXT: ori r5, r5, 12109 1042; P9LE-NEXT: sub r3, r3, r4 1043; P9LE-NEXT: mfvsrd r4, v3 1044; P9LE-NEXT: mulhdu r5, r4, r5 1045; P9LE-NEXT: rldicl r5, r5, 52, 12 1046; P9LE-NEXT: mulli r5, r5, 5423 1047; P9LE-NEXT: sub r4, r4, r5 1048; P9LE-NEXT: lis r5, 25653 1049; P9LE-NEXT: ori r5, r5, 15432 1050; P9LE-NEXT: mtvsrdd v3, r4, r3 1051; P9LE-NEXT: mfvsrd r3, v2 1052; P9LE-NEXT: sldi r5, r5, 32 1053; P9LE-NEXT: rldicl r4, r3, 63, 1 1054; P9LE-NEXT: oris r5, r5, 1603 1055; P9LE-NEXT: ori r5, r5, 21445 1056; P9LE-NEXT: mulhdu r4, r4, r5 1057; P9LE-NEXT: rldicl r4, r4, 57, 7 1058; P9LE-NEXT: mulli r4, r4, 654 1059; P9LE-NEXT: sub r3, r3, r4 1060; P9LE-NEXT: li r4, 0 1061; P9LE-NEXT: mtvsrdd v2, r3, r4 1062; P9LE-NEXT: blr 1063; 1064; P9BE-LABEL: dont_fold_urem_i64: 1065; P9BE: # %bb.0: 1066; P9BE-NEXT: lis r4, 25644 1067; P9BE-NEXT: mfvsrd r3, v3 1068; P9BE-NEXT: ori r4, r4, 34192 1069; P9BE-NEXT: sldi r4, r4, 32 1070; P9BE-NEXT: oris r4, r4, 45590 1071; P9BE-NEXT: ori r4, r4, 17097 1072; P9BE-NEXT: mulhdu r4, r3, r4 1073; P9BE-NEXT: sub r5, r3, r4 1074; P9BE-NEXT: rldicl r5, r5, 63, 1 1075; P9BE-NEXT: add r4, r5, r4 1076; P9BE-NEXT: lis r5, -16037 1077; P9BE-NEXT: rldicl r4, r4, 60, 4 1078; P9BE-NEXT: ori r5, r5, 28749 1079; P9BE-NEXT: mulli r4, r4, 23 1080; P9BE-NEXT: sldi r5, r5, 32 1081; P9BE-NEXT: oris r5, r5, 52170 1082; P9BE-NEXT: ori r5, r5, 12109 1083; P9BE-NEXT: sub r3, r3, r4 1084; P9BE-NEXT: mfvsrld r4, v3 1085; P9BE-NEXT: mulhdu r5, r4, r5 1086; P9BE-NEXT: rldicl r5, r5, 52, 12 1087; P9BE-NEXT: mulli r5, r5, 5423 1088; P9BE-NEXT: sub r4, r4, r5 1089; P9BE-NEXT: lis r5, 25653 1090; P9BE-NEXT: ori r5, r5, 15432 1091; P9BE-NEXT: mtvsrdd v3, r3, r4 1092; P9BE-NEXT: mfvsrld r3, v2 1093; P9BE-NEXT: sldi r5, r5, 32 1094; P9BE-NEXT: rldicl r4, r3, 63, 1 1095; P9BE-NEXT: oris r5, r5, 1603 1096; P9BE-NEXT: ori r5, r5, 21445 1097; P9BE-NEXT: mulhdu r4, r4, r5 1098; P9BE-NEXT: rldicl r4, r4, 57, 7 1099; P9BE-NEXT: mulli r4, r4, 654 1100; P9BE-NEXT: sub r3, r3, r4 1101; P9BE-NEXT: mtvsrdd v2, 0, r3 1102; P9BE-NEXT: blr 1103; 1104; P8LE-LABEL: dont_fold_urem_i64: 1105; P8LE: # %bb.0: 1106; P8LE-NEXT: lis r3, 25644 1107; P8LE-NEXT: xxswapd vs0, v3 1108; P8LE-NEXT: lis r4, -16037 1109; P8LE-NEXT: lis r5, 25653 1110; P8LE-NEXT: mfvsrd r6, v2 1111; P8LE-NEXT: ori r3, r3, 34192 1112; P8LE-NEXT: ori r4, r4, 28749 1113; P8LE-NEXT: ori r5, r5, 15432 1114; P8LE-NEXT: mfvsrd r8, v3 1115; P8LE-NEXT: sldi r3, r3, 32 1116; P8LE-NEXT: sldi r4, r4, 32 1117; P8LE-NEXT: oris r3, r3, 45590 1118; P8LE-NEXT: mffprd r7, f0 1119; P8LE-NEXT: sldi r5, r5, 32 1120; P8LE-NEXT: oris r4, r4, 52170 1121; P8LE-NEXT: ori r3, r3, 17097 1122; P8LE-NEXT: oris r5, r5, 1603 1123; P8LE-NEXT: ori r4, r4, 12109 1124; P8LE-NEXT: mulhdu r3, r7, r3 1125; P8LE-NEXT: rldicl r9, r6, 63, 1 1126; P8LE-NEXT: ori r5, r5, 21445 1127; P8LE-NEXT: mulhdu r4, r8, r4 1128; P8LE-NEXT: mulhdu r5, r9, r5 1129; P8LE-NEXT: sub r9, r7, r3 1130; P8LE-NEXT: rldicl r9, r9, 63, 1 1131; P8LE-NEXT: rldicl r4, r4, 52, 12 1132; P8LE-NEXT: add r3, r9, r3 1133; P8LE-NEXT: rldicl r5, r5, 57, 7 1134; P8LE-NEXT: mulli r4, r4, 5423 1135; P8LE-NEXT: rldicl r3, r3, 60, 4 1136; P8LE-NEXT: mulli r5, r5, 654 1137; P8LE-NEXT: mulli r3, r3, 23 1138; P8LE-NEXT: sub r4, r8, r4 1139; P8LE-NEXT: sub r5, r6, r5 1140; P8LE-NEXT: mtfprd f0, r4 1141; P8LE-NEXT: sub r3, r7, r3 1142; P8LE-NEXT: li r4, 0 1143; P8LE-NEXT: mtfprd f1, r5 1144; P8LE-NEXT: mtfprd f2, r3 1145; P8LE-NEXT: mtfprd f3, r4 1146; P8LE-NEXT: xxmrghd v3, vs0, vs2 1147; P8LE-NEXT: xxmrghd v2, vs1, vs3 1148; P8LE-NEXT: blr 1149; 1150; P8BE-LABEL: dont_fold_urem_i64: 1151; P8BE: # %bb.0: 1152; P8BE-NEXT: lis r3, 25644 1153; P8BE-NEXT: lis r4, -16037 1154; P8BE-NEXT: xxswapd vs0, v3 1155; P8BE-NEXT: xxswapd vs1, v2 1156; P8BE-NEXT: lis r5, 25653 1157; P8BE-NEXT: ori r3, r3, 34192 1158; P8BE-NEXT: ori r4, r4, 28749 1159; P8BE-NEXT: mfvsrd r6, v3 1160; P8BE-NEXT: ori r5, r5, 15432 1161; P8BE-NEXT: sldi r3, r3, 32 1162; P8BE-NEXT: sldi r4, r4, 32 1163; P8BE-NEXT: oris r3, r3, 45590 1164; P8BE-NEXT: sldi r5, r5, 32 1165; P8BE-NEXT: mffprd r7, f0 1166; P8BE-NEXT: oris r4, r4, 52170 1167; P8BE-NEXT: ori r3, r3, 17097 1168; P8BE-NEXT: mffprd r8, f1 1169; P8BE-NEXT: oris r5, r5, 1603 1170; P8BE-NEXT: ori r4, r4, 12109 1171; P8BE-NEXT: mulhdu r3, r6, r3 1172; P8BE-NEXT: ori r5, r5, 21445 1173; P8BE-NEXT: mulhdu r4, r7, r4 1174; P8BE-NEXT: rldicl r9, r8, 63, 1 1175; P8BE-NEXT: mulhdu r5, r9, r5 1176; P8BE-NEXT: sub r9, r6, r3 1177; P8BE-NEXT: rldicl r9, r9, 63, 1 1178; P8BE-NEXT: rldicl r4, r4, 52, 12 1179; P8BE-NEXT: add r3, r9, r3 1180; P8BE-NEXT: mulli r4, r4, 5423 1181; P8BE-NEXT: rldicl r5, r5, 57, 7 1182; P8BE-NEXT: rldicl r3, r3, 60, 4 1183; P8BE-NEXT: mulli r5, r5, 654 1184; P8BE-NEXT: mulli r3, r3, 23 1185; P8BE-NEXT: sub r4, r7, r4 1186; P8BE-NEXT: mtfprd f0, r4 1187; P8BE-NEXT: sub r4, r8, r5 1188; P8BE-NEXT: sub r3, r6, r3 1189; P8BE-NEXT: mtfprd f1, r4 1190; P8BE-NEXT: li r4, 0 1191; P8BE-NEXT: mtfprd f2, r3 1192; P8BE-NEXT: mtfprd f3, r4 1193; P8BE-NEXT: xxmrghd v3, vs2, vs0 1194; P8BE-NEXT: xxmrghd v2, vs3, vs1 1195; P8BE-NEXT: blr 1196 %1 = urem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423> 1197 ret <4 x i64> %1 1198} 1199