1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 4 5; Check constant loads of every 128-bit and 256-bit vector type 6; for size optimization using splat ops available with AVX and AVX2. 7 8; There is no AVX broadcast from double to 128-bit vector because movddup has been around since SSE3 (grrr). 9define <2 x double> @splat_v2f64(<2 x double> %x) #0 { 10; CHECK-LABEL: splat_v2f64: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [1.0E+0,1.0E+0] 13; CHECK-NEXT: # xmm1 = mem[0,0] 14; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 15; CHECK-NEXT: retq 16 %add = fadd <2 x double> %x, <double 1.0, double 1.0> 17 ret <2 x double> %add 18} 19 20define <2 x double> @splat_v2f64_pgso(<2 x double> %x) !prof !14 { 21; CHECK-LABEL: splat_v2f64_pgso: 22; CHECK: # %bb.0: 23; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [1.0E+0,1.0E+0] 24; CHECK-NEXT: # xmm1 = mem[0,0] 25; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 26; CHECK-NEXT: retq 27 %add = fadd <2 x double> %x, <double 1.0, double 1.0> 28 ret <2 x double> %add 29} 30 31define <4 x double> @splat_v4f64(<4 x double> %x) #1 { 32; CHECK-LABEL: splat_v4f64: 33; CHECK: # %bb.0: 34; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 35; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 36; CHECK-NEXT: retq 37 %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0> 38 ret <4 x double> %add 39} 40 41define <4 x double> @splat_v4f64_pgso(<4 x double> %x) !prof !14 { 42; CHECK-LABEL: splat_v4f64_pgso: 43; CHECK: # %bb.0: 44; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 45; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 46; CHECK-NEXT: retq 47 %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0> 48 ret <4 x double> %add 49} 50 51define <4 x float> @splat_v4f32(<4 x float> %x) #0 { 52; CHECK-LABEL: splat_v4f32: 53; CHECK: # %bb.0: 54; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 55; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 56; CHECK-NEXT: retq 57 %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 58 ret <4 x float> %add 59} 60 61define <4 x float> @splat_v4f32_pgso(<4 x float> %x) !prof !14 { 62; CHECK-LABEL: splat_v4f32_pgso: 63; CHECK: # %bb.0: 64; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 65; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 66; CHECK-NEXT: retq 67 %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 68 ret <4 x float> %add 69} 70 71define <8 x float> @splat_v8f32(<8 x float> %x) #1 { 72; CHECK-LABEL: splat_v8f32: 73; CHECK: # %bb.0: 74; CHECK-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 75; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 76; CHECK-NEXT: retq 77 %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 78 ret <8 x float> %add 79} 80 81define <8 x float> @splat_v8f32_pgso(<8 x float> %x) !prof !14 { 82; CHECK-LABEL: splat_v8f32_pgso: 83; CHECK: # %bb.0: 84; CHECK-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 85; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 86; CHECK-NEXT: retq 87 %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 88 ret <8 x float> %add 89} 90 91; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value. 92; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq. 93define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 { 94; AVX-LABEL: splat_v2i64: 95; AVX: # %bb.0: 96; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [2,2] 97; AVX-NEXT: # xmm1 = mem[0,0] 98; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 99; AVX-NEXT: retq 100; 101; AVX2-LABEL: splat_v2i64: 102; AVX2: # %bb.0: 103; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] 104; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 105; AVX2-NEXT: retq 106 %add = add <2 x i64> %x, <i64 2, i64 2> 107 ret <2 x i64> %add 108} 109 110define <2 x i64> @splat_v2i64_pgso(<2 x i64> %x) !prof !14 { 111; AVX-LABEL: splat_v2i64_pgso: 112; AVX: # %bb.0: 113; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [2,2] 114; AVX-NEXT: # xmm1 = mem[0,0] 115; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 116; AVX-NEXT: retq 117; 118; AVX2-LABEL: splat_v2i64_pgso: 119; AVX2: # %bb.0: 120; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] 121; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 122; AVX2-NEXT: retq 123 %add = add <2 x i64> %x, <i64 2, i64 2> 124 ret <2 x i64> %add 125} 126 127; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors, 128; and then we fake it: use vmovddup to splat 64-bit value. 129define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 { 130; AVX-LABEL: splat_v4i64: 131; AVX: # %bb.0: 132; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 133; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [2,2] 134; AVX-NEXT: # xmm2 = mem[0,0] 135; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1 136; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 137; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 138; AVX-NEXT: retq 139; 140; AVX2-LABEL: splat_v4i64: 141; AVX2: # %bb.0: 142; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2] 143; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 144; AVX2-NEXT: retq 145 %add = add <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2> 146 ret <4 x i64> %add 147} 148 149define <4 x i64> @splat_v4i64_pgso(<4 x i64> %x) !prof !14 { 150; AVX-LABEL: splat_v4i64_pgso: 151; AVX: # %bb.0: 152; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 153; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [2,2] 154; AVX-NEXT: # xmm2 = mem[0,0] 155; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1 156; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 157; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 158; AVX-NEXT: retq 159; 160; AVX2-LABEL: splat_v4i64_pgso: 161; AVX2: # %bb.0: 162; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2] 163; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 164; AVX2-NEXT: retq 165 %add = add <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2> 166 ret <4 x i64> %add 167} 168 169; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value. 170define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 { 171; AVX-LABEL: splat_v4i32: 172; AVX: # %bb.0: 173; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [2,2,2,2] 174; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 175; AVX-NEXT: retq 176; 177; AVX2-LABEL: splat_v4i32: 178; AVX2: # %bb.0: 179; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] 180; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 181; AVX2-NEXT: retq 182 %add = add <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 183 ret <4 x i32> %add 184} 185 186define <4 x i32> @splat_v4i32_pgso(<4 x i32> %x) !prof !14 { 187; AVX-LABEL: splat_v4i32_pgso: 188; AVX: # %bb.0: 189; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [2,2,2,2] 190; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 191; AVX-NEXT: retq 192; 193; AVX2-LABEL: splat_v4i32_pgso: 194; AVX2: # %bb.0: 195; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] 196; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 197; AVX2-NEXT: retq 198 %add = add <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 199 ret <4 x i32> %add 200} 201 202; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value. 203define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 { 204; AVX-LABEL: splat_v8i32: 205; AVX: # %bb.0: 206; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 207; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2] 208; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1 209; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 210; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 211; AVX-NEXT: retq 212; 213; AVX2-LABEL: splat_v8i32: 214; AVX2: # %bb.0: 215; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2] 216; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 217; AVX2-NEXT: retq 218 %add = add <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 219 ret <8 x i32> %add 220} 221 222define <8 x i32> @splat_v8i32_pgso(<8 x i32> %x) !prof !14 { 223; AVX-LABEL: splat_v8i32_pgso: 224; AVX: # %bb.0: 225; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 226; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2] 227; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1 228; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 229; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 230; AVX-NEXT: retq 231; 232; AVX2-LABEL: splat_v8i32_pgso: 233; AVX2: # %bb.0: 234; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2] 235; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 236; AVX2-NEXT: retq 237 %add = add <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 238 ret <8 x i32> %add 239} 240 241; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc? 242define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 { 243; AVX-LABEL: splat_v8i16: 244; AVX: # %bb.0: 245; AVX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 246; AVX-NEXT: retq 247; 248; AVX2-LABEL: splat_v8i16: 249; AVX2: # %bb.0: 250; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2] 251; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 252; AVX2-NEXT: retq 253 %add = add <8 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 254 ret <8 x i16> %add 255} 256 257define <8 x i16> @splat_v8i16_pgso(<8 x i16> %x) !prof !14 { 258; AVX-LABEL: splat_v8i16_pgso: 259; AVX: # %bb.0: 260; AVX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 261; AVX-NEXT: retq 262; 263; AVX2-LABEL: splat_v8i16_pgso: 264; AVX2: # %bb.0: 265; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2] 266; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 267; AVX2-NEXT: retq 268 %add = add <8 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 269 ret <8 x i16> %add 270} 271 272; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc? 273define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 { 274; AVX-LABEL: splat_v16i16: 275; AVX: # %bb.0: 276; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 277; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2] 278; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 279; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0 280; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 281; AVX-NEXT: retq 282; 283; AVX2-LABEL: splat_v16i16: 284; AVX2: # %bb.0: 285; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 286; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 287; AVX2-NEXT: retq 288 %add = add <16 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 289 ret <16 x i16> %add 290} 291 292define <16 x i16> @splat_v16i16_pgso(<16 x i16> %x) !prof !14 { 293; AVX-LABEL: splat_v16i16_pgso: 294; AVX: # %bb.0: 295; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 296; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2] 297; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 298; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0 299; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 300; AVX-NEXT: retq 301; 302; AVX2-LABEL: splat_v16i16_pgso: 303; AVX2: # %bb.0: 304; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 305; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 306; AVX2-NEXT: retq 307 %add = add <16 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 308 ret <16 x i16> %add 309} 310 311; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc? 312define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 { 313; AVX-LABEL: splat_v16i8: 314; AVX: # %bb.0: 315; AVX-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 316; AVX-NEXT: retq 317; 318; AVX2-LABEL: splat_v16i8: 319; AVX2: # %bb.0: 320; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 321; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 322; AVX2-NEXT: retq 323 %add = add <16 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 324 ret <16 x i8> %add 325} 326 327define <16 x i8> @splat_v16i8_pgso(<16 x i8> %x) !prof !14 { 328; AVX-LABEL: splat_v16i8_pgso: 329; AVX: # %bb.0: 330; AVX-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 331; AVX-NEXT: retq 332; 333; AVX2-LABEL: splat_v16i8_pgso: 334; AVX2: # %bb.0: 335; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 336; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 337; AVX2-NEXT: retq 338 %add = add <16 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 339 ret <16 x i8> %add 340} 341 342; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc? 343define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 { 344; AVX-LABEL: splat_v32i8: 345; AVX: # %bb.0: 346; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 347; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 348; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1 349; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 350; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 351; AVX-NEXT: retq 352; 353; AVX2-LABEL: splat_v32i8: 354; AVX2: # %bb.0: 355; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 356; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0 357; AVX2-NEXT: retq 358 %add = add <32 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 359 ret <32 x i8> %add 360} 361 362define <32 x i8> @splat_v32i8_pgso(<32 x i8> %x) !prof !14 { 363; AVX-LABEL: splat_v32i8_pgso: 364; AVX: # %bb.0: 365; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 366; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 367; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1 368; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 369; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 370; AVX-NEXT: retq 371; 372; AVX2-LABEL: splat_v32i8_pgso: 373; AVX2: # %bb.0: 374; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 375; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0 376; AVX2-NEXT: retq 377 %add = add <32 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 378 ret <32 x i8> %add 379} 380 381; PR23259: Verify that ISel doesn't crash with a 'fatal error in backend' 382; due to a missing AVX pattern to select a v2i64 X86ISD::BROADCAST of a 383; loadi64 with multiple uses. 384 385@A = common global <3 x i64> zeroinitializer, align 32 386 387define <8 x i64> @pr23259() #1 { 388; AVX-LABEL: pr23259: 389; AVX: # %bb.0: # %entry 390; AVX-NEXT: pushq $1 391; AVX-NEXT: .cfi_adjust_cfa_offset 8 392; AVX-NEXT: popq %rax 393; AVX-NEXT: .cfi_adjust_cfa_offset -8 394; AVX-NEXT: vmovq %rax, %xmm0 395; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 396; AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 397; AVX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 398; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1] 399; AVX-NEXT: retq 400; 401; AVX2-LABEL: pr23259: 402; AVX2: # %bb.0: # %entry 403; AVX2-NEXT: vmovdqa {{.*}}(%rip), %ymm0 404; AVX2-NEXT: pushq $1 405; AVX2-NEXT: .cfi_adjust_cfa_offset 8 406; AVX2-NEXT: popq %rax 407; AVX2-NEXT: .cfi_adjust_cfa_offset -8 408; AVX2-NEXT: vmovq %rax, %xmm1 409; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 410; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,1,1] 411; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1] 412; AVX2-NEXT: retq 413entry: 414 %0 = load <4 x i64>, <4 x i64>* bitcast (<3 x i64>* @A to <4 x i64>*), align 32 415 %1 = shufflevector <4 x i64> %0, <4 x i64> undef, <3 x i32> <i32 undef, i32 undef, i32 2> 416 %shuffle = shufflevector <3 x i64> <i64 1, i64 undef, i64 undef>, <3 x i64> %1, <8 x i32> <i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 417 ret <8 x i64> %shuffle 418} 419 420attributes #0 = { optsize } 421attributes #1 = { minsize } 422 423!llvm.module.flags = !{!0} 424!0 = !{i32 1, !"ProfileSummary", !1} 425!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 426!2 = !{!"ProfileFormat", !"InstrProf"} 427!3 = !{!"TotalCount", i64 10000} 428!4 = !{!"MaxCount", i64 10} 429!5 = !{!"MaxInternalCount", i64 1} 430!6 = !{!"MaxFunctionCount", i64 1000} 431!7 = !{!"NumCounts", i64 3} 432!8 = !{!"NumFunctions", i64 3} 433!9 = !{!"DetailedSummary", !10} 434!10 = !{!11, !12, !13} 435!11 = !{i32 10000, i64 100, i32 1} 436!12 = !{i32 999000, i64 100, i32 1} 437!13 = !{i32 999999, i64 1, i32 2} 438!14 = !{!"function_entry_count", i64 0} 439