1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 3; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ 4; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s 5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ 6; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ 7; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE 8 9; assemble_acc 10declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) 11define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) { 12; CHECK-LABEL: ass_acc: 13; CHECK: # %bb.0: # %entry 14; CHECK-NEXT: vmr v3, v2 15; CHECK-NEXT: xxlor vs0, v2, v2 16; CHECK-NEXT: xxlor vs1, v3, v3 17; CHECK-NEXT: xxlor vs2, v2, v2 18; CHECK-NEXT: xxlor vs3, v3, v3 19; CHECK-NEXT: stxv vs0, 48(r3) 20; CHECK-NEXT: stxv vs1, 32(r3) 21; CHECK-NEXT: stxv vs2, 16(r3) 22; CHECK-NEXT: stxv vs3, 0(r3) 23; CHECK-NEXT: blr 24; 25; CHECK-BE-LABEL: ass_acc: 26; CHECK-BE: # %bb.0: # %entry 27; CHECK-BE-NEXT: vmr v3, v2 28; CHECK-BE-NEXT: xxlor vs0, v2, v2 29; CHECK-BE-NEXT: xxlor vs1, v3, v3 30; CHECK-BE-NEXT: xxlor vs2, v2, v2 31; CHECK-BE-NEXT: xxlor vs3, v3, v3 32; CHECK-BE-NEXT: stxv vs1, 16(r3) 33; CHECK-BE-NEXT: stxv vs0, 0(r3) 34; CHECK-BE-NEXT: stxv vs3, 48(r3) 35; CHECK-BE-NEXT: stxv vs2, 32(r3) 36; CHECK-BE-NEXT: blr 37entry: 38 %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) 39 store <512 x i1> %0, <512 x i1>* %ptr, align 64 40 ret void 41} 42 43; assemble_pair 44declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>) 45define void @ass_pair(<256 x i1>* %ptr, <16 x i8> %vc) { 46; CHECK-LABEL: ass_pair: 47; CHECK: # %bb.0: # %entry 48; CHECK-NEXT: vmr v3, v2 49; CHECK-NEXT: stxv v2, 16(r3) 50; CHECK-NEXT: stxv v3, 0(r3) 51; CHECK-NEXT: blr 52; 53; CHECK-BE-LABEL: ass_pair: 54; CHECK-BE: # %bb.0: # %entry 55; CHECK-BE-NEXT: vmr v3, v2 56; CHECK-BE-NEXT: stxv v2, 16(r3) 57; CHECK-BE-NEXT: stxv v2, 0(r3) 58; CHECK-BE-NEXT: blr 59entry: 60 %0 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %vc, <16 x i8> %vc) 61 store <256 x i1> %0, <256 x i1>* %ptr, align 32 62 ret void 63} 64 65; xxmtacc 66declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>) 67define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) { 68; CHECK-LABEL: int_xxmtacc: 69; CHECK: # %bb.0: # %entry 70; CHECK-NEXT: vmr v3, v2 71; CHECK-NEXT: xxlor vs0, v2, v2 72; CHECK-NEXT: xxlor vs1, v3, v3 73; CHECK-NEXT: xxlor vs2, v2, v2 74; CHECK-NEXT: xxlor vs3, v3, v3 75; CHECK-NEXT: xxmtacc acc0 76; CHECK-NEXT: stxv vs0, 48(r3) 77; CHECK-NEXT: stxv vs1, 32(r3) 78; CHECK-NEXT: stxv vs2, 16(r3) 79; CHECK-NEXT: stxv vs3, 0(r3) 80; CHECK-NEXT: blr 81; 82; CHECK-BE-LABEL: int_xxmtacc: 83; CHECK-BE: # %bb.0: # %entry 84; CHECK-BE-NEXT: vmr v3, v2 85; CHECK-BE-NEXT: xxlor vs0, v2, v2 86; CHECK-BE-NEXT: xxlor vs1, v3, v3 87; CHECK-BE-NEXT: xxlor vs2, v2, v2 88; CHECK-BE-NEXT: xxlor vs3, v3, v3 89; CHECK-BE-NEXT: xxmtacc acc0 90; CHECK-BE-NEXT: stxv vs1, 16(r3) 91; CHECK-BE-NEXT: stxv vs0, 0(r3) 92; CHECK-BE-NEXT: stxv vs3, 48(r3) 93; CHECK-BE-NEXT: stxv vs2, 32(r3) 94; CHECK-BE-NEXT: blr 95entry: 96; One xxmtacc is generated from the call to assemble.acc then one xxmtacc is 97; generated from the call to xxmtacc then one xxmfacc is generated for the store 98 %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) 99 %1 = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> %0) 100 store <512 x i1> %1, <512 x i1>* %ptr, align 64 101 ret void 102} 103 104; xxmfacc 105declare <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1>) 106define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) { 107; CHECK-LABEL: int_xxmfacc: 108; CHECK: # %bb.0: # %entry 109; CHECK-NEXT: vmr v3, v2 110; CHECK-NEXT: xxlor vs0, v2, v2 111; CHECK-NEXT: xxlor vs1, v3, v3 112; CHECK-NEXT: xxlor vs2, v2, v2 113; CHECK-NEXT: xxlor vs3, v3, v3 114; CHECK-NEXT: stxv vs0, 48(r3) 115; CHECK-NEXT: stxv vs1, 32(r3) 116; CHECK-NEXT: stxv vs2, 16(r3) 117; CHECK-NEXT: stxv vs3, 0(r3) 118; CHECK-NEXT: blr 119; 120; CHECK-BE-LABEL: int_xxmfacc: 121; CHECK-BE: # %bb.0: # %entry 122; CHECK-BE-NEXT: vmr v3, v2 123; CHECK-BE-NEXT: xxlor vs0, v2, v2 124; CHECK-BE-NEXT: xxlor vs1, v3, v3 125; CHECK-BE-NEXT: xxlor vs2, v2, v2 126; CHECK-BE-NEXT: xxlor vs3, v3, v3 127; CHECK-BE-NEXT: stxv vs1, 16(r3) 128; CHECK-BE-NEXT: stxv vs0, 0(r3) 129; CHECK-BE-NEXT: stxv vs3, 48(r3) 130; CHECK-BE-NEXT: stxv vs2, 32(r3) 131; CHECK-BE-NEXT: blr 132entry: 133; One xxmtacc is generated from the call to assemble.acc then one xxmfacc is 134; generated from the call to xxmfacc then one xxmfacc is generated for the store 135 %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) 136 %1 = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> %0) 137 store <512 x i1> %1, <512 x i1>* %ptr, align 64 138 ret void 139} 140 141; xxsetaccz 142declare <512 x i1> @llvm.ppc.mma.xxsetaccz() 143define void @int_xxsetaccz(<512 x i1>* %ptr) { 144; CHECK-LABEL: int_xxsetaccz: 145; CHECK: # %bb.0: # %entry 146; CHECK-NEXT: xxsetaccz acc0 147; CHECK-NEXT: xxmfacc acc0 148; CHECK-NEXT: stxv vs0, 48(r3) 149; CHECK-NEXT: stxv vs1, 32(r3) 150; CHECK-NEXT: stxv vs2, 16(r3) 151; CHECK-NEXT: stxv vs3, 0(r3) 152; CHECK-NEXT: blr 153; 154; CHECK-BE-LABEL: int_xxsetaccz: 155; CHECK-BE: # %bb.0: # %entry 156; CHECK-BE-NEXT: xxsetaccz acc0 157; CHECK-BE-NEXT: xxmfacc acc0 158; CHECK-BE-NEXT: stxv vs1, 16(r3) 159; CHECK-BE-NEXT: stxv vs0, 0(r3) 160; CHECK-BE-NEXT: stxv vs3, 48(r3) 161; CHECK-BE-NEXT: stxv vs2, 32(r3) 162; CHECK-BE-NEXT: blr 163entry: 164 %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 165 store <512 x i1> %0, <512 x i1>* %ptr, align 64 166 ret void 167} 168 169; disassemble_acc 170declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>) 171define void @disass_acc(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <16 x i8>* %ptr4) { 172; CHECK-LABEL: disass_acc: 173; CHECK: # %bb.0: # %entry 174; CHECK-NEXT: xxsetaccz acc0 175; CHECK-NEXT: xxmfacc acc0 176; CHECK-NEXT: stxv vs3, 0(r3) 177; CHECK-NEXT: stxv vs2, 0(r4) 178; CHECK-NEXT: stxv vs1, 0(r5) 179; CHECK-NEXT: stxv vs0, 0(r6) 180; CHECK-NEXT: blr 181; 182; CHECK-BE-LABEL: disass_acc: 183; CHECK-BE: # %bb.0: # %entry 184; CHECK-BE-NEXT: xxsetaccz acc0 185; CHECK-BE-NEXT: xxmfacc acc0 186; CHECK-BE-NEXT: stxv vs0, 0(r3) 187; CHECK-BE-NEXT: stxv vs1, 0(r4) 188; CHECK-BE-NEXT: stxv vs2, 0(r5) 189; CHECK-BE-NEXT: stxv vs3, 0(r6) 190; CHECK-BE-NEXT: blr 191entry: 192 %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 193 %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0) 194 %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0 195 %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1 196 %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2 197 %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3 198 store <16 x i8> %2, <16 x i8>* %ptr1, align 16 199 store <16 x i8> %3, <16 x i8>* %ptr2, align 16 200 store <16 x i8> %4, <16 x i8>* %ptr3, align 16 201 store <16 x i8> %5, <16 x i8>* %ptr4, align 16 202 ret void 203} 204 205; disassemble_pair 206declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>) 207define void @disass_pair(<256 x i1>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3) { 208; CHECK-LABEL: disass_pair: 209; CHECK: # %bb.0: # %entry 210; CHECK-NEXT: lxv vs1, 0(r3) 211; CHECK-NEXT: lxv vs0, 16(r3) 212; CHECK-NEXT: stxv vs1, 0(r4) 213; CHECK-NEXT: stxv vs0, 0(r5) 214; CHECK-NEXT: blr 215; 216; CHECK-BE-LABEL: disass_pair: 217; CHECK-BE: # %bb.0: # %entry 218; CHECK-BE-NEXT: lxv vs1, 16(r3) 219; CHECK-BE-NEXT: lxv vs0, 0(r3) 220; CHECK-BE-NEXT: stxv vs0, 0(r4) 221; CHECK-BE-NEXT: stxv vs1, 0(r5) 222; CHECK-BE-NEXT: blr 223entry: 224 %0 = load <256 x i1>, <256 x i1>* %ptr1, align 32 225 %1 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %0) 226 %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0 227 %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1 228 store <16 x i8> %2, <16 x i8>* %ptr2, align 16 229 store <16 x i8> %3, <16 x i8>* %ptr3, align 16 230 ret void 231} 232 233declare <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>) 234define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) { 235; CHECK-LABEL: testBranch: 236; CHECK: # %bb.0: # %entry 237; CHECK-NEXT: cmplwi r7, 0 238; CHECK-NEXT: beq cr0, .LBB7_2 239; CHECK-NEXT: # %bb.1: # %if.then 240; CHECK-NEXT: xxsetaccz acc0 241; CHECK-NEXT: b .LBB7_3 242; CHECK-NEXT: .LBB7_2: # %if.else 243; CHECK-NEXT: lxv vs1, 32(r3) 244; CHECK-NEXT: lxv vs0, 48(r3) 245; CHECK-NEXT: lxv vs3, 0(r3) 246; CHECK-NEXT: lxv vs2, 16(r3) 247; CHECK-NEXT: xxmtacc acc0 248; CHECK-NEXT: xvi4ger8pp acc0, v2, v2 249; CHECK-NEXT: .LBB7_3: # %if.end 250; CHECK-NEXT: xxmfacc acc0 251; CHECK-NEXT: stxv vs0, 48(r3) 252; CHECK-NEXT: stxv vs1, 32(r3) 253; CHECK-NEXT: stxv vs2, 16(r3) 254; CHECK-NEXT: stxv vs3, 0(r3) 255; CHECK-NEXT: blr 256; 257; CHECK-BE-LABEL: testBranch: 258; CHECK-BE: # %bb.0: # %entry 259; CHECK-BE-NEXT: cmplwi r7, 0 260; CHECK-BE-NEXT: beq cr0, .LBB7_2 261; CHECK-BE-NEXT: # %bb.1: # %if.then 262; CHECK-BE-NEXT: xxsetaccz acc0 263; CHECK-BE-NEXT: b .LBB7_3 264; CHECK-BE-NEXT: .LBB7_2: # %if.else 265; CHECK-BE-NEXT: lxv vs1, 16(r3) 266; CHECK-BE-NEXT: lxv vs0, 0(r3) 267; CHECK-BE-NEXT: lxv vs3, 48(r3) 268; CHECK-BE-NEXT: lxv vs2, 32(r3) 269; CHECK-BE-NEXT: xxmtacc acc0 270; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v2 271; CHECK-BE-NEXT: .LBB7_3: # %if.end 272; CHECK-BE-NEXT: xxmfacc acc0 273; CHECK-BE-NEXT: stxv vs1, 16(r3) 274; CHECK-BE-NEXT: stxv vs0, 0(r3) 275; CHECK-BE-NEXT: stxv vs3, 48(r3) 276; CHECK-BE-NEXT: stxv vs2, 32(r3) 277; CHECK-BE-NEXT: blr 278entry: 279 %tobool = icmp eq i32 %val, 0 280 br i1 %tobool, label %if.else, label %if.then 281 282if.then: 283 %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 284 br label %if.end 285 286if.else: 287 %1 = load <512 x i1>, <512 x i1>* %ptr, align 64 288 %2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) 289 br label %if.end 290 291if.end: 292 %vq1.0 = phi <512 x i1> [ %0, %if.then ], [ %2, %if.else ] 293 store <512 x i1> %vq1.0, <512 x i1>* %ptr, align 64 294 ret void 295} 296 297; The following test cases check that the xxsetaccz instruction is correctly rematerialized 298declare <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>) 299declare <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>) 300declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>) 301 302define void @testcse(<512 x i1>* %res, <16 x i8> %vc) { 303; CHECK-LABEL: testcse: 304; CHECK: # %bb.0: # %entry 305; CHECK-NEXT: xxsetaccz acc0 306; CHECK-NEXT: xvf32gerpp acc0, v2, v2 307; CHECK-NEXT: xxmfacc acc0 308; CHECK-NEXT: stxv vs0, 48(r3) 309; CHECK-NEXT: stxv vs1, 32(r3) 310; CHECK-NEXT: stxv vs2, 16(r3) 311; CHECK-NEXT: stxv vs3, 0(r3) 312; CHECK-NEXT: stxv vs0, 112(r3) 313; CHECK-NEXT: stxv vs1, 96(r3) 314; CHECK-NEXT: stxv vs2, 80(r3) 315; CHECK-NEXT: stxv vs3, 64(r3) 316; CHECK-NEXT: blr 317; 318; CHECK-BE-LABEL: testcse: 319; CHECK-BE: # %bb.0: # %entry 320; CHECK-BE-NEXT: xxsetaccz acc0 321; CHECK-BE-NEXT: xvf32gerpp acc0, v2, v2 322; CHECK-BE-NEXT: xxmfacc acc0 323; CHECK-BE-NEXT: stxv vs1, 16(r3) 324; CHECK-BE-NEXT: stxv vs0, 0(r3) 325; CHECK-BE-NEXT: stxv vs3, 48(r3) 326; CHECK-BE-NEXT: stxv vs2, 32(r3) 327; CHECK-BE-NEXT: stxv vs1, 80(r3) 328; CHECK-BE-NEXT: stxv vs0, 64(r3) 329; CHECK-BE-NEXT: stxv vs3, 112(r3) 330; CHECK-BE-NEXT: stxv vs2, 96(r3) 331; CHECK-BE-NEXT: blr 332entry: 333 %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() 334 %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() 335 %2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) 336 %3 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) 337 %4 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 0 338 %5 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 1 339 store <512 x i1> %2, <512 x i1>* %4, align 64 340 store <512 x i1> %3, <512 x i1>* %5, align 64 341 ret void 342} 343 344define void @testcse2(<512 x i1>* %res, <16 x i8> %vc) { 345; CHECK-LABEL: testcse2: 346; CHECK: # %bb.0: # %entry 347; CHECK-NEXT: xxsetaccz acc0 348; CHECK-NEXT: xxsetaccz acc1 349; CHECK-NEXT: xvf32gerpp acc1, v2, v2 350; CHECK-NEXT: xvf32gerpn acc0, v2, v2 351; CHECK-NEXT: xxmfacc acc1 352; CHECK-NEXT: xxmfacc acc0 353; CHECK-NEXT: stxv vs4, 48(r3) 354; CHECK-NEXT: stxv vs5, 32(r3) 355; CHECK-NEXT: stxv vs6, 16(r3) 356; CHECK-NEXT: stxv vs7, 0(r3) 357; CHECK-NEXT: stxv vs0, 112(r3) 358; CHECK-NEXT: stxv vs1, 96(r3) 359; CHECK-NEXT: stxv vs2, 80(r3) 360; CHECK-NEXT: stxv vs3, 64(r3) 361; CHECK-NEXT: blr 362; 363; CHECK-BE-LABEL: testcse2: 364; CHECK-BE: # %bb.0: # %entry 365; CHECK-BE-NEXT: xxsetaccz acc0 366; CHECK-BE-NEXT: xxsetaccz acc1 367; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2 368; CHECK-BE-NEXT: xvf32gerpn acc0, v2, v2 369; CHECK-BE-NEXT: xxmfacc acc1 370; CHECK-BE-NEXT: xxmfacc acc0 371; CHECK-BE-NEXT: stxv vs5, 16(r3) 372; CHECK-BE-NEXT: stxv vs4, 0(r3) 373; CHECK-BE-NEXT: stxv vs7, 48(r3) 374; CHECK-BE-NEXT: stxv vs6, 32(r3) 375; CHECK-BE-NEXT: stxv vs1, 80(r3) 376; CHECK-BE-NEXT: stxv vs0, 64(r3) 377; CHECK-BE-NEXT: stxv vs3, 112(r3) 378; CHECK-BE-NEXT: stxv vs2, 96(r3) 379; CHECK-BE-NEXT: blr 380entry: 381 %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() 382 %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() 383 %2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) 384 %3 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) 385 %4 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 0 386 %5 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 1 387 store <512 x i1> %2, <512 x i1>* %4, align 64 388 store <512 x i1> %3, <512 x i1>* %5, align 64 389 ret void 390} 391 392define void @testcse3(<512 x i1>* %res, <16 x i8> %vc) { 393; CHECK-LABEL: testcse3: 394; CHECK: # %bb.0: # %entry 395; CHECK-NEXT: xxsetaccz acc0 396; CHECK-NEXT: xxsetaccz acc1 397; CHECK-NEXT: xvf32gerpp acc1, v2, v2 398; CHECK-NEXT: xvf32gerpn acc0, v2, v2 399; CHECK-NEXT: xxmfacc acc1 400; CHECK-NEXT: xxmfacc acc0 401; CHECK-NEXT: stxv vs4, 48(r3) 402; CHECK-NEXT: stxv vs5, 32(r3) 403; CHECK-NEXT: stxv vs6, 16(r3) 404; CHECK-NEXT: stxv vs7, 0(r3) 405; CHECK-NEXT: stxv vs0, 112(r3) 406; CHECK-NEXT: stxv vs1, 96(r3) 407; CHECK-NEXT: stxv vs2, 80(r3) 408; CHECK-NEXT: stxv vs3, 64(r3) 409; CHECK-NEXT: blr 410; 411; CHECK-BE-LABEL: testcse3: 412; CHECK-BE: # %bb.0: # %entry 413; CHECK-BE-NEXT: xxsetaccz acc0 414; CHECK-BE-NEXT: xxsetaccz acc1 415; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2 416; CHECK-BE-NEXT: xvf32gerpn acc0, v2, v2 417; CHECK-BE-NEXT: xxmfacc acc1 418; CHECK-BE-NEXT: xxmfacc acc0 419; CHECK-BE-NEXT: stxv vs5, 16(r3) 420; CHECK-BE-NEXT: stxv vs4, 0(r3) 421; CHECK-BE-NEXT: stxv vs7, 48(r3) 422; CHECK-BE-NEXT: stxv vs6, 32(r3) 423; CHECK-BE-NEXT: stxv vs1, 80(r3) 424; CHECK-BE-NEXT: stxv vs0, 64(r3) 425; CHECK-BE-NEXT: stxv vs3, 112(r3) 426; CHECK-BE-NEXT: stxv vs2, 96(r3) 427; CHECK-BE-NEXT: blr 428entry: 429 %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() 430 %1 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) 431 %2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) 432 %3 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 0 433 %4 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 1 434 store <512 x i1> %1, <512 x i1>* %3, align 64 435 store <512 x i1> %2, <512 x i1>* %4, align 64 436 ret void 437} 438 439define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) { 440; CHECK-LABEL: testcse4: 441; CHECK: # %bb.0: # %entry 442; CHECK-NEXT: cmpwi r4, 1 443; CHECK-NEXT: bltlr cr0 444; CHECK-NEXT: # %bb.1: # %for.body.preheader 445; CHECK-NEXT: clrldi r4, r4, 32 446; CHECK-NEXT: li r6, 0 447; CHECK-NEXT: mtctr r4 448; CHECK-NEXT: li r4, 0 449; CHECK-NEXT: .p2align 4 450; CHECK-NEXT: .LBB11_2: # %for.body 451; CHECK-NEXT: # 452; CHECK-NEXT: rldic r7, r6, 4, 28 453; CHECK-NEXT: addi r6, r6, 6 454; CHECK-NEXT: xxsetaccz acc2 455; CHECK-NEXT: xxsetaccz acc1 456; CHECK-NEXT: lxvx vs0, r5, r7 457; CHECK-NEXT: add r7, r5, r7 458; CHECK-NEXT: lxv vs1, 16(r7) 459; CHECK-NEXT: xvf32gerpp acc2, vs0, vs1 460; CHECK-NEXT: lxv vs0, 32(r7) 461; CHECK-NEXT: lxv vs1, 48(r7) 462; CHECK-NEXT: xxmfacc acc2 463; CHECK-NEXT: xvf32gerpn acc1, vs0, vs1 464; CHECK-NEXT: lxv vs12, 64(r7) 465; CHECK-NEXT: lxv vs13, 80(r7) 466; CHECK-NEXT: rldic r7, r4, 6, 26 467; CHECK-NEXT: addi r4, r4, 3 468; CHECK-NEXT: xxsetaccz acc0 469; CHECK-NEXT: xxmfacc acc1 470; CHECK-NEXT: xvf32gernp acc0, vs12, vs13 471; CHECK-NEXT: stxvx vs11, r3, r7 472; CHECK-NEXT: add r7, r3, r7 473; CHECK-NEXT: xxmfacc acc0 474; CHECK-NEXT: stxv vs8, 48(r7) 475; CHECK-NEXT: stxv vs9, 32(r7) 476; CHECK-NEXT: stxv vs10, 16(r7) 477; CHECK-NEXT: stxv vs4, 112(r7) 478; CHECK-NEXT: stxv vs5, 96(r7) 479; CHECK-NEXT: stxv vs6, 80(r7) 480; CHECK-NEXT: stxv vs7, 64(r7) 481; CHECK-NEXT: stxv vs0, 176(r7) 482; CHECK-NEXT: stxv vs1, 160(r7) 483; CHECK-NEXT: stxv vs2, 144(r7) 484; CHECK-NEXT: stxv vs3, 128(r7) 485; CHECK-NEXT: bdnz .LBB11_2 486; CHECK-NEXT: # %bb.3: # %for.cond.cleanup 487; CHECK-NEXT: blr 488; 489; CHECK-BE-LABEL: testcse4: 490; CHECK-BE: # %bb.0: # %entry 491; CHECK-BE-NEXT: cmpwi r4, 1 492; CHECK-BE-NEXT: bltlr cr0 493; CHECK-BE-NEXT: # %bb.1: # %for.body.preheader 494; CHECK-BE-NEXT: clrldi r4, r4, 32 495; CHECK-BE-NEXT: li r6, 0 496; CHECK-BE-NEXT: mtctr r4 497; CHECK-BE-NEXT: li r4, 0 498; CHECK-BE-NEXT: .p2align 4 499; CHECK-BE-NEXT: .LBB11_2: # %for.body 500; CHECK-BE-NEXT: # 501; CHECK-BE-NEXT: rldic r7, r6, 4, 28 502; CHECK-BE-NEXT: addi r6, r6, 6 503; CHECK-BE-NEXT: xxsetaccz acc2 504; CHECK-BE-NEXT: xxsetaccz acc1 505; CHECK-BE-NEXT: lxvx vs0, r5, r7 506; CHECK-BE-NEXT: add r7, r5, r7 507; CHECK-BE-NEXT: lxv vs1, 16(r7) 508; CHECK-BE-NEXT: xvf32gerpp acc2, vs0, vs1 509; CHECK-BE-NEXT: lxv vs0, 32(r7) 510; CHECK-BE-NEXT: lxv vs1, 48(r7) 511; CHECK-BE-NEXT: xxmfacc acc2 512; CHECK-BE-NEXT: xvf32gerpn acc1, vs0, vs1 513; CHECK-BE-NEXT: lxv vs12, 64(r7) 514; CHECK-BE-NEXT: lxv vs13, 80(r7) 515; CHECK-BE-NEXT: rldic r7, r4, 6, 26 516; CHECK-BE-NEXT: addi r4, r4, 3 517; CHECK-BE-NEXT: xxsetaccz acc0 518; CHECK-BE-NEXT: xxmfacc acc1 519; CHECK-BE-NEXT: xvf32gernp acc0, vs12, vs13 520; CHECK-BE-NEXT: stxvx vs8, r3, r7 521; CHECK-BE-NEXT: add r7, r3, r7 522; CHECK-BE-NEXT: xxmfacc acc0 523; CHECK-BE-NEXT: stxv vs9, 16(r7) 524; CHECK-BE-NEXT: stxv vs11, 48(r7) 525; CHECK-BE-NEXT: stxv vs10, 32(r7) 526; CHECK-BE-NEXT: stxv vs5, 80(r7) 527; CHECK-BE-NEXT: stxv vs4, 64(r7) 528; CHECK-BE-NEXT: stxv vs7, 112(r7) 529; CHECK-BE-NEXT: stxv vs6, 96(r7) 530; CHECK-BE-NEXT: stxv vs1, 144(r7) 531; CHECK-BE-NEXT: stxv vs0, 128(r7) 532; CHECK-BE-NEXT: stxv vs3, 176(r7) 533; CHECK-BE-NEXT: stxv vs2, 160(r7) 534; CHECK-BE-NEXT: bdnz .LBB11_2 535; CHECK-BE-NEXT: # %bb.3: # %for.cond.cleanup 536; CHECK-BE-NEXT: blr 537entry: 538 %cmp55 = icmp sgt i32 %lim, 0 539 br i1 %cmp55, label %for.body.preheader, label %for.cond.cleanup 540 541for.body.preheader: ; preds = %entry 542 %wide.trip.count = zext i32 %lim to i64 543 br label %for.body 544 545for.cond.cleanup: ; preds = %for.body, %entry 546 ret void 547 548for.body: ; preds = %for.body, %for.body.preheader 549 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 550 %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 551 %1 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 552 %2 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 553 %3 = trunc i64 %indvars.iv to i32 554 %mul = mul nsw i32 %3, 6 555 %idxprom = zext i32 %mul to i64 556 %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom 557 %4 = load <16 x i8>, <16 x i8>* %arrayidx, align 16 558 %add2 = or i32 %mul, 1 559 %idxprom3 = zext i32 %add2 to i64 560 %arrayidx4 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom3 561 %5 = load <16 x i8>, <16 x i8>* %arrayidx4, align 16 562 %6 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %4, <16 x i8> %5) 563 %add6 = add nuw nsw i32 %mul, 2 564 %idxprom7 = zext i32 %add6 to i64 565 %arrayidx8 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom7 566 %7 = load <16 x i8>, <16 x i8>* %arrayidx8, align 16 567 %add10 = add nuw nsw i32 %mul, 3 568 %idxprom11 = zext i32 %add10 to i64 569 %arrayidx12 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom11 570 %8 = load <16 x i8>, <16 x i8>* %arrayidx12, align 16 571 %9 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %1, <16 x i8> %7, <16 x i8> %8) 572 %add14 = add nuw nsw i32 %mul, 4 573 %idxprom15 = zext i32 %add14 to i64 574 %arrayidx16 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom15 575 %10 = load <16 x i8>, <16 x i8>* %arrayidx16, align 16 576 %add18 = add nuw nsw i32 %mul, 5 577 %idxprom19 = zext i32 %add18 to i64 578 %arrayidx20 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom19 579 %11 = load <16 x i8>, <16 x i8>* %arrayidx20, align 16 580 %12 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %2, <16 x i8> %10, <16 x i8> %11) 581 %mul21 = mul i64 %indvars.iv, 3 582 %idx.ext = and i64 %mul21, 4294967295 583 %add.ptr = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 %idx.ext 584 store <512 x i1> %6, <512 x i1>* %add.ptr, align 64 585 %add.ptr26 = getelementptr inbounds <512 x i1>, <512 x i1>* %add.ptr, i64 1 586 store <512 x i1> %9, <512 x i1>* %add.ptr26, align 64 587 %add.ptr30 = getelementptr inbounds <512 x i1>, <512 x i1>* %add.ptr, i64 2 588 store <512 x i1> %12, <512 x i1>* %add.ptr30, align 64 589 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 590 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count 591 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 592} 593 594declare i32 @testRedundantPrimeUnprimeF() 595define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind { 596; CHECK-LABEL: testRedundantPrimeUnprime: 597; CHECK: .localentry testRedundantPrimeUnprime, 1 598; CHECK-NEXT: # %bb.0: # %entry 599; CHECK-NEXT: mflr r0 600; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill 601; CHECK-NEXT: std r0, 16(r1) 602; CHECK-NEXT: stdu r1, -112(r1) 603; CHECK-NEXT: xxsetaccz acc0 604; CHECK-NEXT: xxsetaccz acc1 605; CHECK-NEXT: mr r30, r3 606; CHECK-NEXT: xxmfacc acc0 607; CHECK-NEXT: stxv vs0, 48(r3) 608; CHECK-NEXT: stxv vs1, 32(r3) 609; CHECK-NEXT: stxv vs2, 16(r3) 610; CHECK-NEXT: stxv vs3, 0(r3) 611; CHECK-NEXT: xvf32gerpp acc1, v2, v2 612; CHECK-NEXT: li r3, 64 613; CHECK-NEXT: xxmfacc acc1 614; CHECK-NEXT: stxvp vsp4, r1(r3) 615; CHECK-NEXT: li r3, 32 616; CHECK-NEXT: stxvp vsp6, r1(r3) 617; CHECK-NEXT: bl testRedundantPrimeUnprimeF@notoc 618; CHECK-NEXT: li r3, 64 619; CHECK-NEXT: lxvp vsp0, r1(r3) 620; CHECK-NEXT: li r3, 32 621; CHECK-NEXT: lxvp vsp2, r1(r3) 622; CHECK-NEXT: stxv vs0, 112(r30) 623; CHECK-NEXT: stxv vs1, 96(r30) 624; CHECK-NEXT: stxv vs2, 80(r30) 625; CHECK-NEXT: stxv vs3, 64(r30) 626; CHECK-NEXT: addi r1, r1, 112 627; CHECK-NEXT: ld r0, 16(r1) 628; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 629; CHECK-NEXT: mtlr r0 630; CHECK-NEXT: blr 631; 632; CHECK-BE-LABEL: testRedundantPrimeUnprime: 633; CHECK-BE: # %bb.0: # %entry 634; CHECK-BE-NEXT: mflr r0 635; CHECK-BE-NEXT: std r0, 16(r1) 636; CHECK-BE-NEXT: stdu r1, -192(r1) 637; CHECK-BE-NEXT: xxsetaccz acc0 638; CHECK-BE-NEXT: xxsetaccz acc1 639; CHECK-BE-NEXT: std r30, 176(r1) # 8-byte Folded Spill 640; CHECK-BE-NEXT: mr r30, r3 641; CHECK-BE-NEXT: xxmfacc acc0 642; CHECK-BE-NEXT: stxv vs1, 16(r3) 643; CHECK-BE-NEXT: stxv vs0, 0(r3) 644; CHECK-BE-NEXT: stxv vs3, 48(r3) 645; CHECK-BE-NEXT: stxv vs2, 32(r3) 646; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2 647; CHECK-BE-NEXT: li r3, 112 648; CHECK-BE-NEXT: xxmfacc acc1 649; CHECK-BE-NEXT: stxvp vsp4, r1(r3) 650; CHECK-BE-NEXT: li r3, 144 651; CHECK-BE-NEXT: stxvp vsp6, r1(r3) 652; CHECK-BE-NEXT: bl testRedundantPrimeUnprimeF 653; CHECK-BE-NEXT: nop 654; CHECK-BE-NEXT: li r3, 112 655; CHECK-BE-NEXT: lxvp vsp0, r1(r3) 656; CHECK-BE-NEXT: li r3, 144 657; CHECK-BE-NEXT: lxvp vsp2, r1(r3) 658; CHECK-BE-NEXT: stxv vs3, 112(r30) 659; CHECK-BE-NEXT: stxv vs2, 96(r30) 660; CHECK-BE-NEXT: stxv vs1, 80(r30) 661; CHECK-BE-NEXT: stxv vs0, 64(r30) 662; CHECK-BE-NEXT: ld r30, 176(r1) # 8-byte Folded Reload 663; CHECK-BE-NEXT: addi r1, r1, 192 664; CHECK-BE-NEXT: ld r0, 16(r1) 665; CHECK-BE-NEXT: mtlr r0 666; CHECK-BE-NEXT: blr 667entry: 668 %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 669 store <512 x i1> %0, <512 x i1>* %dst, align 64 670 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) 671 %call = tail call signext i32 bitcast (i32 ()* @testRedundantPrimeUnprimeF to i32 ()*)() 672 %add.ptr1 = getelementptr inbounds <512 x i1>, <512 x i1>* %dst, i64 1 673 store <512 x i1> %1, <512 x i1>* %add.ptr1, align 64 674 ret void 675} 676 677declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32) 678declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>) 679 680; Function Attrs: nounwind 681define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) { 682; CHECK-LABEL: test_ldst_1: 683; CHECK: # %bb.0: # %entry 684; CHECK-NEXT: lxvp vsp0, 0(r3) 685; CHECK-NEXT: stxvp vsp0, 0(r4) 686; CHECK-NEXT: blr 687; 688; CHECK-BE-LABEL: test_ldst_1: 689; CHECK-BE: # %bb.0: # %entry 690; CHECK-BE-NEXT: lxvp vsp0, 0(r3) 691; CHECK-BE-NEXT: stxvp vsp0, 0(r4) 692; CHECK-BE-NEXT: blr 693entry: 694 %0 = bitcast <256 x i1>* %vpp to i8* 695 %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0) 696 %2 = bitcast <256 x i1>* %vp2 to i8* 697 tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2) 698 ret void 699} 700 701; Function Attrs: argmemonly nounwind readonly 702declare <256 x i1> @llvm.ppc.mma.lxvp(i8*) 703 704; Function Attrs: argmemonly nounwind writeonly 705declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*) 706 707; Function Attrs: nounwind 708define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2) { 709; CHECK-LABEL: test_ldst_2: 710; CHECK: # %bb.0: # %entry 711; CHECK-NEXT: lxvpx vsp0, r3, r4 712; CHECK-NEXT: stxvpx vsp0, r5, r4 713; CHECK-NEXT: blr 714; 715; CHECK-BE-LABEL: test_ldst_2: 716; CHECK-BE: # %bb.0: # %entry 717; CHECK-BE-NEXT: lxvpx vsp0, r3, r4 718; CHECK-BE-NEXT: stxvpx vsp0, r5, r4 719; CHECK-BE-NEXT: blr 720entry: 721 %0 = bitcast <256 x i1>* %vpp to i8* 722 %1 = getelementptr i8, i8* %0, i64 %offset 723 %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) 724 %3 = bitcast <256 x i1>* %vp2 to i8* 725 %4 = getelementptr i8, i8* %3, i64 %offset 726 tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) 727 ret void 728} 729 730; Function Attrs: nounwind 731define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2) { 732; CHECK-LABEL: test_ldst_3: 733; CHECK: # %bb.0: # %entry 734; CHECK-NEXT: li r5, 18 735; CHECK-NEXT: lxvpx vsp0, r3, r5 736; CHECK-NEXT: stxvpx vsp0, r4, r5 737; CHECK-NEXT: blr 738; 739; CHECK-BE-LABEL: test_ldst_3: 740; CHECK-BE: # %bb.0: # %entry 741; CHECK-BE-NEXT: li r5, 18 742; CHECK-BE-NEXT: lxvpx vsp0, r3, r5 743; CHECK-BE-NEXT: stxvpx vsp0, r4, r5 744; CHECK-BE-NEXT: blr 745entry: 746 %0 = bitcast <256 x i1>* %vpp to i8* 747 %1 = getelementptr i8, i8* %0, i64 18 748 %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) 749 %3 = bitcast <256 x i1>* %vp2 to i8* 750 %4 = getelementptr i8, i8* %3, i64 18 751 tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) 752 ret void 753} 754 755; Function Attrs: nounwind 756define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2) { 757; CHECK-LABEL: test_ldst_4: 758; CHECK: # %bb.0: # %entry 759; CHECK-NEXT: li r5, 1 760; CHECK-NEXT: lxvpx vsp0, r3, r5 761; CHECK-NEXT: stxvpx vsp0, r4, r5 762; CHECK-NEXT: blr 763; 764; CHECK-BE-LABEL: test_ldst_4: 765; CHECK-BE: # %bb.0: # %entry 766; CHECK-BE-NEXT: li r5, 1 767; CHECK-BE-NEXT: lxvpx vsp0, r3, r5 768; CHECK-BE-NEXT: stxvpx vsp0, r4, r5 769; CHECK-BE-NEXT: blr 770entry: 771 %0 = bitcast <256 x i1>* %vpp to i8* 772 %1 = getelementptr i8, i8* %0, i64 1 773 %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) 774 %3 = bitcast <256 x i1>* %vp2 to i8* 775 %4 = getelementptr i8, i8* %3, i64 1 776 tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) 777 ret void 778} 779 780; Function Attrs: nounwind 781define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2) { 782; CHECK-LABEL: test_ldst_5: 783; CHECK: # %bb.0: # %entry 784; CHECK-NEXT: li r5, 42 785; CHECK-NEXT: lxvpx vsp0, r3, r5 786; CHECK-NEXT: stxvpx vsp0, r4, r5 787; CHECK-NEXT: blr 788; 789; CHECK-BE-LABEL: test_ldst_5: 790; CHECK-BE: # %bb.0: # %entry 791; CHECK-BE-NEXT: li r5, 42 792; CHECK-BE-NEXT: lxvpx vsp0, r3, r5 793; CHECK-BE-NEXT: stxvpx vsp0, r4, r5 794; CHECK-BE-NEXT: blr 795entry: 796 %0 = bitcast <256 x i1>* %vpp to i8* 797 %1 = getelementptr i8, i8* %0, i64 42 798 %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) 799 %3 = bitcast <256 x i1>* %vp2 to i8* 800 %4 = getelementptr i8, i8* %3, i64 42 801 tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) 802 ret void 803} 804 805; Function Attrs: nounwind 806define void @test_ldst_6(<256 x i1>* %vpp, <256 x i1>* %vp2) { 807; CHECK-LABEL: test_ldst_6: 808; CHECK: # %bb.0: # %entry 809; CHECK-NEXT: lxvp vsp0, 4096(r3) 810; CHECK-NEXT: stxvp vsp0, 4096(r4) 811; CHECK-NEXT: blr 812; 813; CHECK-BE-LABEL: test_ldst_6: 814; CHECK-BE: # %bb.0: # %entry 815; CHECK-BE-NEXT: lxvp vsp0, 4096(r3) 816; CHECK-BE-NEXT: stxvp vsp0, 4096(r4) 817; CHECK-BE-NEXT: blr 818entry: 819 %0 = getelementptr <256 x i1>, <256 x i1>* %vpp, i64 128 820 %1 = bitcast <256 x i1>* %0 to i8* 821 %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) 822 %3 = getelementptr <256 x i1>, <256 x i1>* %vp2, i64 128 823 %4 = bitcast <256 x i1>* %3 to i8* 824 tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) 825 ret void 826} 827 828; Function Attrs: nounwind 829define void @test_ldst_7(<256 x i1>* %vpp, <256 x i1>* %vp2) { 830; FIXME: A prefixed load (plxvp) is expected here as the offset in this 831; test case is a constant that fits within 34-bits. 832; CHECK-LABEL: test_ldst_7: 833; CHECK: # %bb.0: # %entry 834; CHECK-NEXT: li r5, 0 835; CHECK-NEXT: ori r5, r5, 32799 836; CHECK-NEXT: lxvpx vsp0, r3, r5 837; CHECK-NEXT: stxvpx vsp0, r4, r5 838; CHECK-NEXT: blr 839; 840; CHECK-BE-LABEL: test_ldst_7: 841; CHECK-BE: # %bb.0: # %entry 842; CHECK-BE-NEXT: li r5, 0 843; CHECK-BE-NEXT: ori r5, r5, 32799 844; CHECK-BE-NEXT: lxvpx vsp0, r3, r5 845; CHECK-BE-NEXT: stxvpx vsp0, r4, r5 846; CHECK-BE-NEXT: blr 847entry: 848 %0 = bitcast <256 x i1>* %vpp to i8* 849 %1 = getelementptr i8, i8* %0, i64 32799 850 %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) 851 %3 = bitcast <256 x i1>* %vp2 to i8* 852 %4 = getelementptr i8, i8* %3, i64 32799 853 tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) 854 ret void 855} 856 857; Function Attrs: nofree nounwind 858define void @test_ldst_8(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) { 859; CHECK-LABEL: test_ldst_8: 860; CHECK: # %bb.0: # %entry 861; CHECK-NEXT: lxv vs1, 32(r3) 862; CHECK-NEXT: lxv vs0, 48(r3) 863; CHECK-NEXT: lxv vs3, 0(r3) 864; CHECK-NEXT: lxv vs2, 16(r3) 865; CHECK-NEXT: li r3, 8 866; CHECK-NEXT: lxvpx vsp4, r4, r3 867; CHECK-NEXT: xxmtacc acc0 868; CHECK-NEXT: pmxvf64gernn acc0, vsp4, v2, 0, 0 869; CHECK-NEXT: xxmfacc acc0 870; CHECK-NEXT: stxv vs0, 48(r7) 871; CHECK-NEXT: stxv vs1, 32(r7) 872; CHECK-NEXT: stxv vs2, 16(r7) 873; CHECK-NEXT: stxv vs3, 0(r7) 874; CHECK-NEXT: blr 875; 876; CHECK-BE-LABEL: test_ldst_8: 877; CHECK-BE: # %bb.0: # %entry 878; CHECK-BE-NEXT: lxv vs1, 16(r3) 879; CHECK-BE-NEXT: lxv vs0, 0(r3) 880; CHECK-BE-NEXT: lxv vs3, 48(r3) 881; CHECK-BE-NEXT: lxv vs2, 32(r3) 882; CHECK-BE-NEXT: li r3, 8 883; CHECK-BE-NEXT: lxvpx vsp4, r4, r3 884; CHECK-BE-NEXT: xxmtacc acc0 885; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp4, v2, 0, 0 886; CHECK-BE-NEXT: xxmfacc acc0 887; CHECK-BE-NEXT: stxv vs1, 16(r7) 888; CHECK-BE-NEXT: stxv vs0, 0(r7) 889; CHECK-BE-NEXT: stxv vs3, 48(r7) 890; CHECK-BE-NEXT: stxv vs2, 32(r7) 891; CHECK-BE-NEXT: blr 892entry: 893 %0 = bitcast i8* %vqp to <512 x i1>* 894 %1 = load <512 x i1>, <512 x i1>* %0, align 64 895 %2 = bitcast <256 x i1>* %vpp to i8* 896 %3 = getelementptr i8, i8* %2, i64 8 897 %4 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %3) 898 %5 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %1, <256 x i1> %4, <16 x i8> %vc, i32 0, i32 0) 899 %6 = bitcast i8* %resp to <512 x i1>* 900 store <512 x i1> %5, <512 x i1>* %6, align 64 901 ret void 902} 903 904; Function Attrs: nofree nounwind 905define void @test_ldst_9(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) { 906; CHECK-LABEL: test_ldst_9: 907; CHECK: # %bb.0: # %entry 908; CHECK-NEXT: lxv vs1, 32(r3) 909; CHECK-NEXT: lxv vs0, 48(r3) 910; CHECK-NEXT: lxv vs3, 0(r3) 911; CHECK-NEXT: lxv vs2, 16(r3) 912; CHECK-NEXT: lxvp vsp4, 0(r4) 913; CHECK-NEXT: xxmtacc acc0 914; CHECK-NEXT: xvf64gernp acc0, vsp4, v2 915; CHECK-NEXT: xxmfacc acc0 916; CHECK-NEXT: stxv vs0, 48(r7) 917; CHECK-NEXT: stxv vs1, 32(r7) 918; CHECK-NEXT: stxv vs2, 16(r7) 919; CHECK-NEXT: stxv vs3, 0(r7) 920; CHECK-NEXT: blr 921; 922; CHECK-BE-LABEL: test_ldst_9: 923; CHECK-BE: # %bb.0: # %entry 924; CHECK-BE-NEXT: lxv vs1, 16(r3) 925; CHECK-BE-NEXT: lxv vs0, 0(r3) 926; CHECK-BE-NEXT: lxv vs3, 48(r3) 927; CHECK-BE-NEXT: lxv vs2, 32(r3) 928; CHECK-BE-NEXT: lxvp vsp4, 0(r4) 929; CHECK-BE-NEXT: xxmtacc acc0 930; CHECK-BE-NEXT: xvf64gernp acc0, vsp4, v2 931; CHECK-BE-NEXT: xxmfacc acc0 932; CHECK-BE-NEXT: stxv vs1, 16(r7) 933; CHECK-BE-NEXT: stxv vs0, 0(r7) 934; CHECK-BE-NEXT: stxv vs3, 48(r7) 935; CHECK-BE-NEXT: stxv vs2, 32(r7) 936; CHECK-BE-NEXT: blr 937entry: 938 %0 = bitcast i8* %vqp to <512 x i1>* 939 %1 = load <512 x i1>, <512 x i1>* %0, align 64 940 %2 = bitcast <256 x i1>* %vpp to i8* 941 %3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2) 942 %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc) 943 %5 = bitcast i8* %resp to <512 x i1>* 944 store <512 x i1> %4, <512 x i1>* %5, align 64 945 ret void 946} 947 948; Function Attrs: nofree nounwind 949define void @test_ldst_10(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) { 950; CHECK-LABEL: test_ldst_10: 951; CHECK: # %bb.0: # %entry 952; CHECK-NEXT: lxv vs1, 32(r3) 953; CHECK-NEXT: lxv vs0, 48(r3) 954; CHECK-NEXT: lxv vs3, 0(r3) 955; CHECK-NEXT: lxv vs2, 16(r3) 956; CHECK-NEXT: lxvp vsp4, 0(r5) 957; CHECK-NEXT: xxmtacc acc0 958; CHECK-NEXT: xvf64gernp acc0, vsp4, v2 959; CHECK-NEXT: xxmfacc acc0 960; CHECK-NEXT: stxv vs0, 48(r9) 961; CHECK-NEXT: stxv vs1, 32(r9) 962; CHECK-NEXT: stxv vs2, 16(r9) 963; CHECK-NEXT: stxv vs3, 0(r9) 964; CHECK-NEXT: blr 965; 966; CHECK-BE-LABEL: test_ldst_10: 967; CHECK-BE: # %bb.0: # %entry 968; CHECK-BE-NEXT: lxv vs1, 16(r3) 969; CHECK-BE-NEXT: lxv vs0, 0(r3) 970; CHECK-BE-NEXT: lxv vs3, 48(r3) 971; CHECK-BE-NEXT: lxv vs2, 32(r3) 972; CHECK-BE-NEXT: lxvp vsp4, 0(r5) 973; CHECK-BE-NEXT: xxmtacc acc0 974; CHECK-BE-NEXT: xvf64gernp acc0, vsp4, v2 975; CHECK-BE-NEXT: xxmfacc acc0 976; CHECK-BE-NEXT: stxv vs1, 16(r9) 977; CHECK-BE-NEXT: stxv vs0, 0(r9) 978; CHECK-BE-NEXT: stxv vs3, 48(r9) 979; CHECK-BE-NEXT: stxv vs2, 32(r9) 980; CHECK-BE-NEXT: blr 981entry: 982 %0 = bitcast i8* %vqp to <512 x i1>* 983 %1 = load <512 x i1>, <512 x i1>* %0, align 64 984 %2 = bitcast <256 x i1>* %vpp to i8* 985 %3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2) 986 %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc) 987 %5 = bitcast i8* %resp to <512 x i1>* 988 store <512 x i1> %4, <512 x i1>* %5, align 64 989 ret void 990} 991