1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \ 3; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P8 4 5; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \ 6; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9 7 8; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \ 9; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P8-BE 10 11; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \ 12; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9-BE 13 14define <2 x i64> @load_swap00(<2 x i64>* %vp1, <2 x i64>* %vp2) { 15; CHECK-P8-LABEL: load_swap00: 16; CHECK-P8: # %bb.0: 17; CHECK-P8-NEXT: lxvd2x v2, 0, r3 18; CHECK-P8-NEXT: blr 19; 20; CHECK-P9-LABEL: load_swap00: 21; CHECK-P9: # %bb.0: 22; CHECK-P9-NEXT: lxvd2x v2, 0, r3 23; CHECK-P9-NEXT: blr 24; 25; CHECK-P8-BE-LABEL: load_swap00: 26; CHECK-P8-BE: # %bb.0: 27; CHECK-P8-BE-NEXT: lxvd2x v2, 0, r3 28; CHECK-P8-BE-NEXT: xxswapd v2, v2 29; CHECK-P8-BE-NEXT: blr 30; 31; CHECK-P9-BE-LABEL: load_swap00: 32; CHECK-P9-BE: # %bb.0: 33; CHECK-P9-BE-NEXT: lxv v2, 0(r3) 34; CHECK-P9-BE-NEXT: xxswapd v2, v2 35; CHECK-P9-BE-NEXT: blr 36 %v1 = load <2 x i64>, <2 x i64>* %vp1 37 %v2 = load <2 x i64>, <2 x i64>* %vp2 38 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 1, i32 0> 39 ret <2 x i64> %v3 40} 41 42define <2 x i64> @load_swap01(<2 x i64>* %vp1, <2 x i64>* %vp2) { 43; CHECK-P8-LABEL: load_swap01: 44; CHECK-P8: # %bb.0: 45; CHECK-P8-NEXT: lxvd2x v2, 0, r4 46; CHECK-P8-NEXT: blr 47; 48; CHECK-P9-LABEL: load_swap01: 49; CHECK-P9: # %bb.0: 50; CHECK-P9-NEXT: lxvd2x v2, 0, r4 51; CHECK-P9-NEXT: blr 52; 53; CHECK-P8-BE-LABEL: load_swap01: 54; CHECK-P8-BE: # %bb.0: 55; CHECK-P8-BE-NEXT: lxvd2x v2, 0, r4 56; CHECK-P8-BE-NEXT: xxswapd v2, v2 57; CHECK-P8-BE-NEXT: blr 58; 59; CHECK-P9-BE-LABEL: load_swap01: 60; CHECK-P9-BE: # %bb.0: 61; CHECK-P9-BE-NEXT: lxv v2, 0(r4) 62; CHECK-P9-BE-NEXT: xxswapd v2, v2 63; CHECK-P9-BE-NEXT: blr 64 %v1 = load <2 x i64>, <2 x i64>* %vp1 65 %v2 = load <2 x i64>, <2 x i64>* %vp2 66 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 3, i32 2> 67 ret <2 x i64> %v3 68} 69 70define <4 x i32> @load_swap10(<4 x i32>* %vp1, <4 x i32>* %vp2) { 71; CHECK-P8-LABEL: load_swap10: 72; CHECK-P8: # %bb.0: 73; CHECK-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha 74; CHECK-P8-NEXT: lvx v3, 0, r3 75; CHECK-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l 76; CHECK-P8-NEXT: lvx v2, 0, r4 77; CHECK-P8-NEXT: vperm v2, v3, v3, v2 78; CHECK-P8-NEXT: blr 79; 80; CHECK-P9-LABEL: load_swap10: 81; CHECK-P9: # %bb.0: 82; CHECK-P9-NEXT: lxvw4x v2, 0, r3 83; CHECK-P9-NEXT: blr 84; 85; CHECK-P8-BE-LABEL: load_swap10: 86; CHECK-P8-BE: # %bb.0: 87; CHECK-P8-BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha 88; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3 89; CHECK-P8-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l 90; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r4 91; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3 92; CHECK-P8-BE-NEXT: blr 93; 94; CHECK-P9-BE-LABEL: load_swap10: 95; CHECK-P9-BE: # %bb.0: 96; CHECK-P9-BE-NEXT: lxv v2, 0(r3) 97; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha 98; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l 99; CHECK-P9-BE-NEXT: lxvx v3, 0, r3 100; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 101; CHECK-P9-BE-NEXT: blr 102 %v1 = load <4 x i32>, <4 x i32>* %vp1 103 %v2 = load <4 x i32>, <4 x i32>* %vp2 104 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 105 ret <4 x i32> %v3 106} 107 108define <4 x i32> @load_swap11(<4 x i32>* %vp1, <4 x i32>* %vp2) { 109; CHECK-P8-LABEL: load_swap11: 110; CHECK-P8: # %bb.0: 111; CHECK-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha 112; CHECK-P8-NEXT: lvx v3, 0, r4 113; CHECK-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l 114; CHECK-P8-NEXT: lvx v2, 0, r3 115; CHECK-P8-NEXT: vperm v2, v3, v3, v2 116; CHECK-P8-NEXT: blr 117; 118; CHECK-P9-LABEL: load_swap11: 119; CHECK-P9: # %bb.0: 120; CHECK-P9-NEXT: lxvw4x v2, 0, r4 121; CHECK-P9-NEXT: blr 122; 123; CHECK-P8-BE-LABEL: load_swap11: 124; CHECK-P8-BE: # %bb.0: 125; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha 126; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4 127; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l 128; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3 129; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3 130; CHECK-P8-BE-NEXT: blr 131; 132; CHECK-P9-BE-LABEL: load_swap11: 133; CHECK-P9-BE: # %bb.0: 134; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha 135; CHECK-P9-BE-NEXT: lxv v2, 0(r4) 136; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l 137; CHECK-P9-BE-NEXT: lxvx v3, 0, r3 138; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 139; CHECK-P9-BE-NEXT: blr 140 %v1 = load <4 x i32>, <4 x i32>* %vp1 141 %v2 = load <4 x i32>, <4 x i32>* %vp2 142 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4> 143 ret <4 x i32> %v3 144} 145 146define <8 x i16> @load_swap20(<8 x i16>* %vp1, <8 x i16>* %vp2){ 147; CHECK-P8-LABEL: load_swap20: 148; CHECK-P8: # %bb.0: 149; CHECK-P8-NEXT: addis r4, r2, .LCPI4_0@toc@ha 150; CHECK-P8-NEXT: lvx v3, 0, r3 151; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l 152; CHECK-P8-NEXT: lvx v2, 0, r4 153; CHECK-P8-NEXT: vperm v2, v3, v3, v2 154; CHECK-P8-NEXT: blr 155; 156; CHECK-P9-LABEL: load_swap20: 157; CHECK-P9: # %bb.0: 158; CHECK-P9-NEXT: lxvh8x v2, 0, r3 159; CHECK-P9-NEXT: blr 160; 161; CHECK-P8-BE-LABEL: load_swap20: 162; CHECK-P8-BE: # %bb.0: 163; CHECK-P8-BE-NEXT: addis r4, r2, .LCPI4_0@toc@ha 164; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3 165; CHECK-P8-BE-NEXT: addi r4, r4, .LCPI4_0@toc@l 166; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r4 167; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3 168; CHECK-P8-BE-NEXT: blr 169; 170; CHECK-P9-BE-LABEL: load_swap20: 171; CHECK-P9-BE: # %bb.0: 172; CHECK-P9-BE-NEXT: lxv v2, 0(r3) 173; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha 174; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l 175; CHECK-P9-BE-NEXT: lxvx v3, 0, r3 176; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 177; CHECK-P9-BE-NEXT: blr 178 %v1 = load <8 x i16>, <8 x i16>* %vp1 179 %v2 = load <8 x i16>, <8 x i16>* %vp2 180 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 181 ret <8 x i16> %v3 182} 183 184define <8 x i16> @load_swap21(<8 x i16>* %vp1, <8 x i16>* %vp2){ 185; CHECK-P8-LABEL: load_swap21: 186; CHECK-P8: # %bb.0: 187; CHECK-P8-NEXT: addis r3, r2, .LCPI5_0@toc@ha 188; CHECK-P8-NEXT: lvx v3, 0, r4 189; CHECK-P8-NEXT: addi r3, r3, .LCPI5_0@toc@l 190; CHECK-P8-NEXT: lvx v2, 0, r3 191; CHECK-P8-NEXT: vperm v2, v3, v3, v2 192; CHECK-P8-NEXT: blr 193; 194; CHECK-P9-LABEL: load_swap21: 195; CHECK-P9: # %bb.0: 196; CHECK-P9-NEXT: lxvh8x v2, 0, r4 197; CHECK-P9-NEXT: blr 198; 199; CHECK-P8-BE-LABEL: load_swap21: 200; CHECK-P8-BE: # %bb.0: 201; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha 202; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4 203; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l 204; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3 205; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3 206; CHECK-P8-BE-NEXT: blr 207; 208; CHECK-P9-BE-LABEL: load_swap21: 209; CHECK-P9-BE: # %bb.0: 210; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha 211; CHECK-P9-BE-NEXT: lxv v2, 0(r4) 212; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l 213; CHECK-P9-BE-NEXT: lxvx v3, 0, r3 214; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 215; CHECK-P9-BE-NEXT: blr 216 %v1 = load <8 x i16>, <8 x i16>* %vp1 217 %v2 = load <8 x i16>, <8 x i16>* %vp2 218 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 219 ret <8 x i16> %v3 220} 221 222define <16 x i8> @load_swap30(<16 x i8>* %vp1, <16 x i8>* %vp2){ 223; CHECK-P8-LABEL: load_swap30: 224; CHECK-P8: # %bb.0: 225; CHECK-P8-NEXT: addis r4, r2, .LCPI6_0@toc@ha 226; CHECK-P8-NEXT: lvx v3, 0, r3 227; CHECK-P8-NEXT: addi r4, r4, .LCPI6_0@toc@l 228; CHECK-P8-NEXT: lvx v2, 0, r4 229; CHECK-P8-NEXT: vperm v2, v3, v3, v2 230; CHECK-P8-NEXT: blr 231; 232; CHECK-P9-LABEL: load_swap30: 233; CHECK-P9: # %bb.0: 234; CHECK-P9-NEXT: lxvb16x v2, 0, r3 235; CHECK-P9-NEXT: blr 236; 237; CHECK-P8-BE-LABEL: load_swap30: 238; CHECK-P8-BE: # %bb.0: 239; CHECK-P8-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha 240; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3 241; CHECK-P8-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l 242; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r4 243; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3 244; CHECK-P8-BE-NEXT: blr 245; 246; CHECK-P9-BE-LABEL: load_swap30: 247; CHECK-P9-BE: # %bb.0: 248; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) 249; CHECK-P9-BE-NEXT: xxbrq v2, vs0 250; CHECK-P9-BE-NEXT: blr 251 %v1 = load <16 x i8>, <16 x i8>* %vp1 252 %v2 = load <16 x i8>, <16 x i8>* %vp2 253 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 254 ret <16 x i8> %v3 255} 256 257define <16 x i8> @load_swap31(<16 x i8>* %vp1, <16 x i8>* %vp2){ 258; CHECK-P8-LABEL: load_swap31: 259; CHECK-P8: # %bb.0: 260; CHECK-P8-NEXT: addis r3, r2, .LCPI7_0@toc@ha 261; CHECK-P8-NEXT: lvx v3, 0, r4 262; CHECK-P8-NEXT: addi r3, r3, .LCPI7_0@toc@l 263; CHECK-P8-NEXT: lvx v2, 0, r3 264; CHECK-P8-NEXT: vperm v2, v3, v3, v2 265; CHECK-P8-NEXT: blr 266; 267; CHECK-P9-LABEL: load_swap31: 268; CHECK-P9: # %bb.0: 269; CHECK-P9-NEXT: lxvb16x v2, 0, r4 270; CHECK-P9-NEXT: blr 271; 272; CHECK-P8-BE-LABEL: load_swap31: 273; CHECK-P8-BE: # %bb.0: 274; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha 275; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4 276; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI7_0@toc@l 277; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3 278; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3 279; CHECK-P8-BE-NEXT: blr 280; 281; CHECK-P9-BE-LABEL: load_swap31: 282; CHECK-P9-BE: # %bb.0: 283; CHECK-P9-BE-NEXT: lxv vs0, 0(r4) 284; CHECK-P9-BE-NEXT: xxbrq v2, vs0 285; CHECK-P9-BE-NEXT: blr 286 %v1 = load <16 x i8>, <16 x i8>* %vp1 287 %v2 = load <16 x i8>, <16 x i8>* %vp2 288 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16> 289 ret <16 x i8> %v3 290} 291 292define <2 x double> @load_swap40(<2 x double>* %vp1, <2 x double>* %vp2) { 293; CHECK-P8-LABEL: load_swap40: 294; CHECK-P8: # %bb.0: 295; CHECK-P8-NEXT: lxvd2x v2, 0, r4 296; CHECK-P8-NEXT: blr 297; 298; CHECK-P9-LABEL: load_swap40: 299; CHECK-P9: # %bb.0: 300; CHECK-P9-NEXT: lxvd2x v2, 0, r4 301; CHECK-P9-NEXT: blr 302; 303; CHECK-P8-BE-LABEL: load_swap40: 304; CHECK-P8-BE: # %bb.0: 305; CHECK-P8-BE-NEXT: lxvd2x vs0, 0, r4 306; CHECK-P8-BE-NEXT: xxswapd v2, vs0 307; CHECK-P8-BE-NEXT: blr 308; 309; CHECK-P9-BE-LABEL: load_swap40: 310; CHECK-P9-BE: # %bb.0: 311; CHECK-P9-BE-NEXT: lxv vs0, 0(r4) 312; CHECK-P9-BE-NEXT: xxswapd v2, vs0 313; CHECK-P9-BE-NEXT: blr 314 %v1 = load <2 x double>, <2 x double>* %vp1 315 %v2 = load <2 x double>, <2 x double>* %vp2 316 %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 3, i32 2> 317 ret <2 x double> %v3 318} 319 320define <4 x float> @load_swap50(<4 x float>* %vp1, <4 x float>* %vp2) { 321; CHECK-P8-LABEL: load_swap50: 322; CHECK-P8: # %bb.0: 323; CHECK-P8-NEXT: addis r4, r2, .LCPI9_0@toc@ha 324; CHECK-P8-NEXT: lvx v3, 0, r3 325; CHECK-P8-NEXT: addi r4, r4, .LCPI9_0@toc@l 326; CHECK-P8-NEXT: lvx v2, 0, r4 327; CHECK-P8-NEXT: vperm v2, v3, v3, v2 328; CHECK-P8-NEXT: blr 329; 330; CHECK-P9-LABEL: load_swap50: 331; CHECK-P9: # %bb.0: 332; CHECK-P9-NEXT: lxvw4x v2, 0, r3 333; CHECK-P9-NEXT: blr 334; 335; CHECK-P8-BE-LABEL: load_swap50: 336; CHECK-P8-BE: # %bb.0: 337; CHECK-P8-BE-NEXT: addis r4, r2, .LCPI9_0@toc@ha 338; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3 339; CHECK-P8-BE-NEXT: addi r4, r4, .LCPI9_0@toc@l 340; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r4 341; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3 342; CHECK-P8-BE-NEXT: blr 343; 344; CHECK-P9-BE-LABEL: load_swap50: 345; CHECK-P9-BE: # %bb.0: 346; CHECK-P9-BE-NEXT: lxv v2, 0(r3) 347; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI9_0@toc@ha 348; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI9_0@toc@l 349; CHECK-P9-BE-NEXT: lxvx v3, 0, r3 350; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 351; CHECK-P9-BE-NEXT: blr 352 %v1 = load <4 x float>, <4 x float>* %vp1 353 %v2 = load <4 x float>, <4 x float>* %vp2 354 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 355 ret <4 x float> %v3 356} 357 358define <4 x float> @load_swap51(<4 x float>* %vp1, <4 x float>* %vp2) { 359; CHECK-P8-LABEL: load_swap51: 360; CHECK-P8: # %bb.0: 361; CHECK-P8-NEXT: addis r3, r2, .LCPI10_0@toc@ha 362; CHECK-P8-NEXT: lvx v3, 0, r4 363; CHECK-P8-NEXT: addi r3, r3, .LCPI10_0@toc@l 364; CHECK-P8-NEXT: lvx v2, 0, r3 365; CHECK-P8-NEXT: vperm v2, v3, v3, v2 366; CHECK-P8-NEXT: blr 367; 368; CHECK-P9-LABEL: load_swap51: 369; CHECK-P9: # %bb.0: 370; CHECK-P9-NEXT: lxvw4x v2, 0, r4 371; CHECK-P9-NEXT: blr 372; 373; CHECK-P8-BE-LABEL: load_swap51: 374; CHECK-P8-BE: # %bb.0: 375; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI10_0@toc@ha 376; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4 377; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI10_0@toc@l 378; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3 379; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3 380; CHECK-P8-BE-NEXT: blr 381; 382; CHECK-P9-BE-LABEL: load_swap51: 383; CHECK-P9-BE: # %bb.0: 384; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI10_0@toc@ha 385; CHECK-P9-BE-NEXT: lxv v2, 0(r4) 386; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI10_0@toc@l 387; CHECK-P9-BE-NEXT: lxvx v3, 0, r3 388; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 389; CHECK-P9-BE-NEXT: blr 390 %v1 = load <4 x float>, <4 x float>* %vp1 391 %v2 = load <4 x float>, <4 x float>* %vp2 392 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4> 393 ret <4 x float> %v3 394} 395 396define void @swap_store00(<2 x i64> %v1, <2 x i64> %v2, <2 x i64>* %vp) { 397; CHECK-P8-LABEL: swap_store00: 398; CHECK-P8: # %bb.0: 399; CHECK-P8-NEXT: stxvd2x v2, 0, r7 400; CHECK-P8-NEXT: blr 401; 402; CHECK-P9-LABEL: swap_store00: 403; CHECK-P9: # %bb.0: 404; CHECK-P9-NEXT: stxvd2x v2, 0, r7 405; CHECK-P9-NEXT: blr 406; 407; CHECK-P8-BE-LABEL: swap_store00: 408; CHECK-P8-BE: # %bb.0: 409; CHECK-P8-BE-NEXT: xxswapd vs0, v2 410; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7 411; CHECK-P8-BE-NEXT: blr 412; 413; CHECK-P9-BE-LABEL: swap_store00: 414; CHECK-P9-BE: # %bb.0: 415; CHECK-P9-BE-NEXT: xxswapd vs0, v2 416; CHECK-P9-BE-NEXT: stxv vs0, 0(r7) 417; CHECK-P9-BE-NEXT: blr 418 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 1, i32 0> 419 store <2 x i64> %v3, <2 x i64>* %vp 420 ret void 421} 422 423define void @swap_store01(<2 x i64> %v1, <2 x i64> %v2, <2 x i64>* %vp) { 424; CHECK-P8-LABEL: swap_store01: 425; CHECK-P8: # %bb.0: 426; CHECK-P8-NEXT: stxvd2x v3, 0, r7 427; CHECK-P8-NEXT: blr 428; 429; CHECK-P9-LABEL: swap_store01: 430; CHECK-P9: # %bb.0: 431; CHECK-P9-NEXT: stxvd2x v3, 0, r7 432; CHECK-P9-NEXT: blr 433; 434; CHECK-P8-BE-LABEL: swap_store01: 435; CHECK-P8-BE: # %bb.0: 436; CHECK-P8-BE-NEXT: xxswapd vs0, v3 437; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7 438; CHECK-P8-BE-NEXT: blr 439; 440; CHECK-P9-BE-LABEL: swap_store01: 441; CHECK-P9-BE: # %bb.0: 442; CHECK-P9-BE-NEXT: xxswapd vs0, v3 443; CHECK-P9-BE-NEXT: stxv vs0, 0(r7) 444; CHECK-P9-BE-NEXT: blr 445 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 3, i32 2> 446 store <2 x i64> %v3, <2 x i64>* %vp 447 ret void 448} 449 450define void @swap_store10(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) { 451; CHECK-P8-LABEL: swap_store10: 452; CHECK-P8: # %bb.0: 453; CHECK-P8-NEXT: addis r3, r2, .LCPI13_0@toc@ha 454; CHECK-P8-NEXT: addi r3, r3, .LCPI13_0@toc@l 455; CHECK-P8-NEXT: lvx v3, 0, r3 456; CHECK-P8-NEXT: vperm v2, v2, v2, v3 457; CHECK-P8-NEXT: stvx v2, 0, r7 458; CHECK-P8-NEXT: blr 459; 460; CHECK-P9-LABEL: swap_store10: 461; CHECK-P9: # %bb.0: 462; CHECK-P9-NEXT: stxvw4x v2, 0, r7 463; CHECK-P9-NEXT: blr 464; 465; CHECK-P8-BE-LABEL: swap_store10: 466; CHECK-P8-BE: # %bb.0: 467; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI13_0@toc@ha 468; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI13_0@toc@l 469; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3 470; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3 471; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7 472; CHECK-P8-BE-NEXT: blr 473; 474; CHECK-P9-BE-LABEL: swap_store10: 475; CHECK-P9-BE: # %bb.0: 476; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI13_0@toc@ha 477; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI13_0@toc@l 478; CHECK-P9-BE-NEXT: lxvx v3, 0, r3 479; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 480; CHECK-P9-BE-NEXT: stxv v2, 0(r7) 481; CHECK-P9-BE-NEXT: blr 482 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 483 store <4 x i32> %v3, <4 x i32>* %vp 484 ret void 485} 486 487define void @swap_store11(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) { 488; CHECK-P8-LABEL: swap_store11: 489; CHECK-P8: # %bb.0: 490; CHECK-P8-NEXT: addis r3, r2, .LCPI14_0@toc@ha 491; CHECK-P8-NEXT: addi r3, r3, .LCPI14_0@toc@l 492; CHECK-P8-NEXT: lvx v2, 0, r3 493; CHECK-P8-NEXT: vperm v2, v3, v3, v2 494; CHECK-P8-NEXT: stvx v2, 0, r7 495; CHECK-P8-NEXT: blr 496; 497; CHECK-P9-LABEL: swap_store11: 498; CHECK-P9: # %bb.0: 499; CHECK-P9-NEXT: stxvw4x v3, 0, r7 500; CHECK-P9-NEXT: blr 501; 502; CHECK-P8-BE-LABEL: swap_store11: 503; CHECK-P8-BE: # %bb.0: 504; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI14_0@toc@ha 505; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI14_0@toc@l 506; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3 507; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2 508; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7 509; CHECK-P8-BE-NEXT: blr 510; 511; CHECK-P9-BE-LABEL: swap_store11: 512; CHECK-P9-BE: # %bb.0: 513; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI14_0@toc@ha 514; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI14_0@toc@l 515; CHECK-P9-BE-NEXT: lxvx v2, 0, r3 516; CHECK-P9-BE-NEXT: vperm v2, v3, v3, v2 517; CHECK-P9-BE-NEXT: stxv v2, 0(r7) 518; CHECK-P9-BE-NEXT: blr 519 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4> 520 store <4 x i32> %v3, <4 x i32>* %vp 521 ret void 522} 523 524define void @swap_store20(<8 x i16> %v1, <8 x i16> %v2, <8 x i16>* %vp) { 525; CHECK-P8-LABEL: swap_store20: 526; CHECK-P8: # %bb.0: 527; CHECK-P8-NEXT: addis r3, r2, .LCPI15_0@toc@ha 528; CHECK-P8-NEXT: addi r3, r3, .LCPI15_0@toc@l 529; CHECK-P8-NEXT: lvx v3, 0, r3 530; CHECK-P8-NEXT: vperm v2, v2, v2, v3 531; CHECK-P8-NEXT: stvx v2, 0, r7 532; CHECK-P8-NEXT: blr 533; 534; CHECK-P9-LABEL: swap_store20: 535; CHECK-P9: # %bb.0: 536; CHECK-P9-NEXT: stxvh8x v2, 0, r7 537; CHECK-P9-NEXT: blr 538; 539; CHECK-P8-BE-LABEL: swap_store20: 540; CHECK-P8-BE: # %bb.0: 541; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI15_0@toc@ha 542; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI15_0@toc@l 543; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3 544; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3 545; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7 546; CHECK-P8-BE-NEXT: blr 547; 548; CHECK-P9-BE-LABEL: swap_store20: 549; CHECK-P9-BE: # %bb.0: 550; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI15_0@toc@ha 551; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI15_0@toc@l 552; CHECK-P9-BE-NEXT: lxvx v3, 0, r3 553; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 554; CHECK-P9-BE-NEXT: stxv v2, 0(r7) 555; CHECK-P9-BE-NEXT: blr 556 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 557 store <8 x i16> %v3, <8 x i16>* %vp 558 ret void 559} 560 561define void @swap_store21(<8 x i16> %v1, <8 x i16> %v2, <8 x i16>* %vp) { 562; CHECK-P8-LABEL: swap_store21: 563; CHECK-P8: # %bb.0: 564; CHECK-P8-NEXT: addis r3, r2, .LCPI16_0@toc@ha 565; CHECK-P8-NEXT: addi r3, r3, .LCPI16_0@toc@l 566; CHECK-P8-NEXT: lvx v2, 0, r3 567; CHECK-P8-NEXT: vperm v2, v3, v3, v2 568; CHECK-P8-NEXT: stvx v2, 0, r7 569; CHECK-P8-NEXT: blr 570; 571; CHECK-P9-LABEL: swap_store21: 572; CHECK-P9: # %bb.0: 573; CHECK-P9-NEXT: stxvh8x v3, 0, r7 574; CHECK-P9-NEXT: blr 575; 576; CHECK-P8-BE-LABEL: swap_store21: 577; CHECK-P8-BE: # %bb.0: 578; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI16_0@toc@ha 579; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI16_0@toc@l 580; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3 581; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2 582; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7 583; CHECK-P8-BE-NEXT: blr 584; 585; CHECK-P9-BE-LABEL: swap_store21: 586; CHECK-P9-BE: # %bb.0: 587; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI16_0@toc@ha 588; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI16_0@toc@l 589; CHECK-P9-BE-NEXT: lxvx v2, 0, r3 590; CHECK-P9-BE-NEXT: vperm v2, v3, v3, v2 591; CHECK-P9-BE-NEXT: stxv v2, 0(r7) 592; CHECK-P9-BE-NEXT: blr 593 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 594 store <8 x i16> %v3, <8 x i16>* %vp 595 ret void 596} 597 598define void @swap_store30(<16 x i8> %v1, <16 x i8> %v2, <16 x i8>* %vp) { 599; CHECK-P8-LABEL: swap_store30: 600; CHECK-P8: # %bb.0: 601; CHECK-P8-NEXT: addis r3, r2, .LCPI17_0@toc@ha 602; CHECK-P8-NEXT: addi r3, r3, .LCPI17_0@toc@l 603; CHECK-P8-NEXT: lvx v3, 0, r3 604; CHECK-P8-NEXT: vperm v2, v2, v2, v3 605; CHECK-P8-NEXT: stvx v2, 0, r7 606; CHECK-P8-NEXT: blr 607; 608; CHECK-P9-LABEL: swap_store30: 609; CHECK-P9: # %bb.0: 610; CHECK-P9-NEXT: stxvb16x v2, 0, r7 611; CHECK-P9-NEXT: blr 612; 613; CHECK-P8-BE-LABEL: swap_store30: 614; CHECK-P8-BE: # %bb.0: 615; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha 616; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI17_0@toc@l 617; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3 618; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3 619; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7 620; CHECK-P8-BE-NEXT: blr 621; 622; CHECK-P9-BE-LABEL: swap_store30: 623; CHECK-P9-BE: # %bb.0: 624; CHECK-P9-BE-NEXT: xxbrq vs0, v2 625; CHECK-P9-BE-NEXT: stxv vs0, 0(r7) 626; CHECK-P9-BE-NEXT: blr 627 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 628 store <16 x i8> %v3, <16 x i8>* %vp 629 ret void 630} 631 632define void @swap_store31(<16 x i8> %v1, <16 x i8> %v2, <16 x i8>* %vp) { 633; CHECK-P8-LABEL: swap_store31: 634; CHECK-P8: # %bb.0: 635; CHECK-P8-NEXT: addis r3, r2, .LCPI18_0@toc@ha 636; CHECK-P8-NEXT: addi r3, r3, .LCPI18_0@toc@l 637; CHECK-P8-NEXT: lvx v2, 0, r3 638; CHECK-P8-NEXT: vperm v2, v3, v3, v2 639; CHECK-P8-NEXT: stvx v2, 0, r7 640; CHECK-P8-NEXT: blr 641; 642; CHECK-P9-LABEL: swap_store31: 643; CHECK-P9: # %bb.0: 644; CHECK-P9-NEXT: stxvb16x v3, 0, r7 645; CHECK-P9-NEXT: blr 646; 647; CHECK-P8-BE-LABEL: swap_store31: 648; CHECK-P8-BE: # %bb.0: 649; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI18_0@toc@ha 650; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI18_0@toc@l 651; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3 652; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2 653; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7 654; CHECK-P8-BE-NEXT: blr 655; 656; CHECK-P9-BE-LABEL: swap_store31: 657; CHECK-P9-BE: # %bb.0: 658; CHECK-P9-BE-NEXT: xxbrq vs0, v3 659; CHECK-P9-BE-NEXT: stxv vs0, 0(r7) 660; CHECK-P9-BE-NEXT: blr 661 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16> 662 store <16 x i8> %v3, <16 x i8>* %vp 663 ret void 664} 665 666define void @swap_store40(<2 x double> %v1, <2 x double> %v2, <2 x double>* %vp) { 667; CHECK-P8-LABEL: swap_store40: 668; CHECK-P8: # %bb.0: 669; CHECK-P8-NEXT: stxvd2x v2, 0, r7 670; CHECK-P8-NEXT: blr 671; 672; CHECK-P9-LABEL: swap_store40: 673; CHECK-P9: # %bb.0: 674; CHECK-P9-NEXT: stxvd2x v2, 0, r7 675; CHECK-P9-NEXT: blr 676; 677; CHECK-P8-BE-LABEL: swap_store40: 678; CHECK-P8-BE: # %bb.0: 679; CHECK-P8-BE-NEXT: xxswapd vs0, v2 680; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7 681; CHECK-P8-BE-NEXT: blr 682; 683; CHECK-P9-BE-LABEL: swap_store40: 684; CHECK-P9-BE: # %bb.0: 685; CHECK-P9-BE-NEXT: xxswapd vs0, v2 686; CHECK-P9-BE-NEXT: stxv vs0, 0(r7) 687; CHECK-P9-BE-NEXT: blr 688 %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 1, i32 0> 689 store <2 x double> %v3, <2 x double>* %vp 690 ret void 691} 692 693define void @swap_store41(<2 x double> %v1, <2 x double> %v2, <2 x double>* %vp) { 694; CHECK-P8-LABEL: swap_store41: 695; CHECK-P8: # %bb.0: 696; CHECK-P8-NEXT: stxvd2x v3, 0, r7 697; CHECK-P8-NEXT: blr 698; 699; CHECK-P9-LABEL: swap_store41: 700; CHECK-P9: # %bb.0: 701; CHECK-P9-NEXT: stxvd2x v3, 0, r7 702; CHECK-P9-NEXT: blr 703; 704; CHECK-P8-BE-LABEL: swap_store41: 705; CHECK-P8-BE: # %bb.0: 706; CHECK-P8-BE-NEXT: xxswapd vs0, v3 707; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7 708; CHECK-P8-BE-NEXT: blr 709; 710; CHECK-P9-BE-LABEL: swap_store41: 711; CHECK-P9-BE: # %bb.0: 712; CHECK-P9-BE-NEXT: xxswapd vs0, v3 713; CHECK-P9-BE-NEXT: stxv vs0, 0(r7) 714; CHECK-P9-BE-NEXT: blr 715 %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 3, i32 2> 716 store <2 x double> %v3, <2 x double>* %vp 717 ret void 718} 719 720define void @swap_store50(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) { 721; CHECK-P8-LABEL: swap_store50: 722; CHECK-P8: # %bb.0: 723; CHECK-P8-NEXT: addis r3, r2, .LCPI21_0@toc@ha 724; CHECK-P8-NEXT: addi r3, r3, .LCPI21_0@toc@l 725; CHECK-P8-NEXT: lvx v3, 0, r3 726; CHECK-P8-NEXT: vperm v2, v2, v2, v3 727; CHECK-P8-NEXT: stvx v2, 0, r7 728; CHECK-P8-NEXT: blr 729; 730; CHECK-P9-LABEL: swap_store50: 731; CHECK-P9: # %bb.0: 732; CHECK-P9-NEXT: stxvw4x v2, 0, r7 733; CHECK-P9-NEXT: blr 734; 735; CHECK-P8-BE-LABEL: swap_store50: 736; CHECK-P8-BE: # %bb.0: 737; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI21_0@toc@ha 738; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI21_0@toc@l 739; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3 740; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3 741; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7 742; CHECK-P8-BE-NEXT: blr 743; 744; CHECK-P9-BE-LABEL: swap_store50: 745; CHECK-P9-BE: # %bb.0: 746; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI21_0@toc@ha 747; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI21_0@toc@l 748; CHECK-P9-BE-NEXT: lxvx v3, 0, r3 749; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 750; CHECK-P9-BE-NEXT: stxv v2, 0(r7) 751; CHECK-P9-BE-NEXT: blr 752 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 753 store <4 x float> %v3, <4 x float>* %vp 754 ret void 755} 756 757define void @swap_store51(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) { 758; CHECK-P8-LABEL: swap_store51: 759; CHECK-P8: # %bb.0: 760; CHECK-P8-NEXT: addis r3, r2, .LCPI22_0@toc@ha 761; CHECK-P8-NEXT: addi r3, r3, .LCPI22_0@toc@l 762; CHECK-P8-NEXT: lvx v2, 0, r3 763; CHECK-P8-NEXT: vperm v2, v3, v3, v2 764; CHECK-P8-NEXT: stvx v2, 0, r7 765; CHECK-P8-NEXT: blr 766; 767; CHECK-P9-LABEL: swap_store51: 768; CHECK-P9: # %bb.0: 769; CHECK-P9-NEXT: stxvw4x v3, 0, r7 770; CHECK-P9-NEXT: blr 771; 772; CHECK-P8-BE-LABEL: swap_store51: 773; CHECK-P8-BE: # %bb.0: 774; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI22_0@toc@ha 775; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI22_0@toc@l 776; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3 777; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2 778; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7 779; CHECK-P8-BE-NEXT: blr 780; 781; CHECK-P9-BE-LABEL: swap_store51: 782; CHECK-P9-BE: # %bb.0: 783; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI22_0@toc@ha 784; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI22_0@toc@l 785; CHECK-P9-BE-NEXT: lxvx v2, 0, r3 786; CHECK-P9-BE-NEXT: vperm v2, v3, v3, v2 787; CHECK-P9-BE-NEXT: stxv v2, 0(r7) 788; CHECK-P9-BE-NEXT: blr 789 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4> 790 store <4 x float> %v3, <4 x float>* %vp 791 ret void 792} 793