1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 4 5; 6; 128-bit Vectors 7; 8 9define <4 x float> @test_unpackl_fhadd_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> %a3) { 10; X32-LABEL: test_unpackl_fhadd_128: 11; X32: ## %bb.0: 12; X32-NEXT: vhaddps %xmm2, %xmm0, %xmm0 13; X32-NEXT: retl 14; 15; X64-LABEL: test_unpackl_fhadd_128: 16; X64: ## %bb.0: 17; X64-NEXT: vhaddps %xmm2, %xmm0, %xmm0 18; X64-NEXT: retq 19 %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 20 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a2, <4 x float> %a3) 21 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 22 ret <4 x float> %3 23} 24 25define <2 x double> @test_unpackh_fhadd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> %a3) { 26; X32-LABEL: test_unpackh_fhadd_128: 27; X32: ## %bb.0: 28; X32-NEXT: vhaddpd %xmm3, %xmm1, %xmm0 29; X32-NEXT: retl 30; 31; X64-LABEL: test_unpackh_fhadd_128: 32; X64: ## %bb.0: 33; X64-NEXT: vhaddpd %xmm3, %xmm1, %xmm0 34; X64-NEXT: retq 35 %1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 36 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a2, <2 x double> %a3) 37 %3 = shufflevector <2 x double> %1, <2 x double> %2, <2 x i32> <i32 1, i32 3> 38 ret <2 x double> %3 39} 40 41define <2 x double> @test_unpackl_fhsub_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> %a3) { 42; X32-LABEL: test_unpackl_fhsub_128: 43; X32: ## %bb.0: 44; X32-NEXT: vhsubpd %xmm2, %xmm0, %xmm0 45; X32-NEXT: retl 46; 47; X64-LABEL: test_unpackl_fhsub_128: 48; X64: ## %bb.0: 49; X64-NEXT: vhsubpd %xmm2, %xmm0, %xmm0 50; X64-NEXT: retq 51 %1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 52 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a2, <2 x double> %a3) 53 %3 = shufflevector <2 x double> %1, <2 x double> %2, <2 x i32> <i32 0, i32 2> 54 ret <2 x double> %3 55} 56 57define <4 x float> @test_unpackh_fhsub_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> %a3) { 58; X32-LABEL: test_unpackh_fhsub_128: 59; X32: ## %bb.0: 60; X32-NEXT: vhsubps %xmm3, %xmm1, %xmm0 61; X32-NEXT: retl 62; 63; X64-LABEL: test_unpackh_fhsub_128: 64; X64: ## %bb.0: 65; X64-NEXT: vhsubps %xmm3, %xmm1, %xmm0 66; X64-NEXT: retq 67 %1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 68 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a2, <4 x float> %a3) 69 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 70 ret <4 x float> %3 71} 72 73define <8 x i16> @test_unpackl_hadd_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) { 74; X32-LABEL: test_unpackl_hadd_128: 75; X32: ## %bb.0: 76; X32-NEXT: vphaddw %xmm2, %xmm0, %xmm0 77; X32-NEXT: retl 78; 79; X64-LABEL: test_unpackl_hadd_128: 80; X64: ## %bb.0: 81; X64-NEXT: vphaddw %xmm2, %xmm0, %xmm0 82; X64-NEXT: retq 83 %1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) 84 %2 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a2, <8 x i16> %a3) 85 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 86 ret <8 x i16> %3 87} 88 89define <4 x i32> @test_unpackh_hadd_128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) { 90; X32-LABEL: test_unpackh_hadd_128: 91; X32: ## %bb.0: 92; X32-NEXT: vphaddd %xmm3, %xmm1, %xmm0 93; X32-NEXT: retl 94; 95; X64-LABEL: test_unpackh_hadd_128: 96; X64: ## %bb.0: 97; X64-NEXT: vphaddd %xmm3, %xmm1, %xmm0 98; X64-NEXT: retq 99 %1 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) 100 %2 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a2, <4 x i32> %a3) 101 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 102 ret <4 x i32> %3 103} 104 105define <4 x i32> @test_unpackl_hsub_128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) { 106; X32-LABEL: test_unpackl_hsub_128: 107; X32: ## %bb.0: 108; X32-NEXT: vphsubd %xmm2, %xmm0, %xmm0 109; X32-NEXT: retl 110; 111; X64-LABEL: test_unpackl_hsub_128: 112; X64: ## %bb.0: 113; X64-NEXT: vphsubd %xmm2, %xmm0, %xmm0 114; X64-NEXT: retq 115 %1 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) 116 %2 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a2, <4 x i32> %a3) 117 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 118 ret <4 x i32> %3 119} 120 121define <8 x i16> @test_unpackh_hsub_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) { 122; X32-LABEL: test_unpackh_hsub_128: 123; X32: ## %bb.0: 124; X32-NEXT: vphsubw %xmm3, %xmm1, %xmm0 125; X32-NEXT: retl 126; 127; X64-LABEL: test_unpackh_hsub_128: 128; X64: ## %bb.0: 129; X64-NEXT: vphsubw %xmm3, %xmm1, %xmm0 130; X64-NEXT: retq 131 %1 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) 132 %2 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a2, <8 x i16> %a3) 133 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 134 ret <8 x i16> %3 135} 136 137define <16 x i8> @test_unpackl_packss_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) { 138; X32-LABEL: test_unpackl_packss_128: 139; X32: ## %bb.0: 140; X32-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 141; X32-NEXT: retl 142; 143; X64-LABEL: test_unpackl_packss_128: 144; X64: ## %bb.0: 145; X64-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 146; X64-NEXT: retq 147 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) 148 %2 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a2, <8 x i16> %a3) 149 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 150 ret <16 x i8> %3 151} 152 153define <8 x i16> @test_unpackh_packss_128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) { 154; X32-LABEL: test_unpackh_packss_128: 155; X32: ## %bb.0: 156; X32-NEXT: vpackssdw %xmm3, %xmm1, %xmm0 157; X32-NEXT: retl 158; 159; X64-LABEL: test_unpackh_packss_128: 160; X64: ## %bb.0: 161; X64-NEXT: vpackssdw %xmm3, %xmm1, %xmm0 162; X64-NEXT: retq 163 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) 164 %2 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a2, <4 x i32> %a3) 165 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 166 ret <8 x i16> %3 167} 168 169define <8 x i16> @test_unpackl_packus_128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) { 170; X32-LABEL: test_unpackl_packus_128: 171; X32: ## %bb.0: 172; X32-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 173; X32-NEXT: retl 174; 175; X64-LABEL: test_unpackl_packus_128: 176; X64: ## %bb.0: 177; X64-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 178; X64-NEXT: retq 179 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) 180 %2 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a2, <4 x i32> %a3) 181 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 182 ret <8 x i16> %3 183} 184 185define <16 x i8> @test_unpackh_packus_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) { 186; X32-LABEL: test_unpackh_packus_128: 187; X32: ## %bb.0: 188; X32-NEXT: vpackuswb %xmm3, %xmm1, %xmm0 189; X32-NEXT: retl 190; 191; X64-LABEL: test_unpackh_packus_128: 192; X64: ## %bb.0: 193; X64-NEXT: vpackuswb %xmm3, %xmm1, %xmm0 194; X64-NEXT: retq 195 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) 196 %2 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3) 197 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 198 ret <16 x i8> %3 199} 200 201; 202; 256-bit Vectors 203; 204 205define <8 x float> @test_unpackl_fhadd_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> %a3) { 206; X32-LABEL: test_unpackl_fhadd_256: 207; X32: ## %bb.0: 208; X32-NEXT: vhaddps %ymm2, %ymm0, %ymm0 209; X32-NEXT: retl 210; 211; X64-LABEL: test_unpackl_fhadd_256: 212; X64: ## %bb.0: 213; X64-NEXT: vhaddps %ymm2, %ymm0, %ymm0 214; X64-NEXT: retq 215 %1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) 216 %2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a2, <8 x float> %a3) 217 %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13> 218 ret <8 x float> %3 219} 220 221define <4 x double> @test_unpackh_fhadd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> %a3) { 222; X32-LABEL: test_unpackh_fhadd_256: 223; X32: ## %bb.0: 224; X32-NEXT: vhaddpd %ymm3, %ymm1, %ymm0 225; X32-NEXT: retl 226; 227; X64-LABEL: test_unpackh_fhadd_256: 228; X64: ## %bb.0: 229; X64-NEXT: vhaddpd %ymm3, %ymm1, %ymm0 230; X64-NEXT: retq 231 %1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) 232 %2 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a2, <4 x double> %a3) 233 %3 = shufflevector <4 x double> %1, <4 x double> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 234 ret <4 x double> %3 235} 236 237define <4 x double> @test_unpackl_fhsub_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> %a3) { 238; X32-LABEL: test_unpackl_fhsub_256: 239; X32: ## %bb.0: 240; X32-NEXT: vhsubpd %ymm2, %ymm0, %ymm0 241; X32-NEXT: retl 242; 243; X64-LABEL: test_unpackl_fhsub_256: 244; X64: ## %bb.0: 245; X64-NEXT: vhsubpd %ymm2, %ymm0, %ymm0 246; X64-NEXT: retq 247 %1 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) 248 %2 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a2, <4 x double> %a3) 249 %3 = shufflevector <4 x double> %1, <4 x double> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 250 ret <4 x double> %3 251} 252 253define <8 x float> @test_unpackh_fhsub_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> %a3) { 254; X32-LABEL: test_unpackh_fhsub_256: 255; X32: ## %bb.0: 256; X32-NEXT: vhsubps %ymm3, %ymm1, %ymm0 257; X32-NEXT: retl 258; 259; X64-LABEL: test_unpackh_fhsub_256: 260; X64: ## %bb.0: 261; X64-NEXT: vhsubps %ymm3, %ymm1, %ymm0 262; X64-NEXT: retq 263 %1 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) 264 %2 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a2, <8 x float> %a3) 265 %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 2, i32 3, i32 10, i32 11, i32 6, i32 7, i32 14, i32 15> 266 ret <8 x float> %3 267} 268 269define <16 x i16> @test_unpackl_hadd_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2, <16 x i16> %a3) { 270; X32-LABEL: test_unpackl_hadd_256: 271; X32: ## %bb.0: 272; X32-NEXT: vphaddw %ymm2, %ymm0, %ymm0 273; X32-NEXT: retl 274; 275; X64-LABEL: test_unpackl_hadd_256: 276; X64: ## %bb.0: 277; X64-NEXT: vphaddw %ymm2, %ymm0, %ymm0 278; X64-NEXT: retq 279 %1 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) 280 %2 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a2, <16 x i16> %a3) 281 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27> 282 ret <16 x i16> %3 283} 284 285define <8 x i32> @test_unpackh_hadd_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) { 286; X32-LABEL: test_unpackh_hadd_256: 287; X32: ## %bb.0: 288; X32-NEXT: vphaddd %ymm3, %ymm1, %ymm0 289; X32-NEXT: retl 290; 291; X64-LABEL: test_unpackh_hadd_256: 292; X64: ## %bb.0: 293; X64-NEXT: vphaddd %ymm3, %ymm1, %ymm0 294; X64-NEXT: retq 295 %1 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) 296 %2 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a2, <8 x i32> %a3) 297 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 2, i32 3, i32 10, i32 11, i32 6, i32 7, i32 14, i32 15> 298 ret <8 x i32> %3 299} 300 301define <8 x i32> @test_unpackl_hsub_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) { 302; X32-LABEL: test_unpackl_hsub_256: 303; X32: ## %bb.0: 304; X32-NEXT: vphsubd %ymm2, %ymm0, %ymm0 305; X32-NEXT: retl 306; 307; X64-LABEL: test_unpackl_hsub_256: 308; X64: ## %bb.0: 309; X64-NEXT: vphsubd %ymm2, %ymm0, %ymm0 310; X64-NEXT: retq 311 %1 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) 312 %2 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a2, <8 x i32> %a3) 313 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13> 314 ret <8 x i32> %3 315} 316 317define <16 x i16> @test_unpackh_hsub_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2, <16 x i16> %a3) { 318; X32-LABEL: test_unpackh_hsub_256: 319; X32: ## %bb.0: 320; X32-NEXT: vphsubw %ymm3, %ymm1, %ymm0 321; X32-NEXT: retl 322; 323; X64-LABEL: test_unpackh_hsub_256: 324; X64: ## %bb.0: 325; X64-NEXT: vphsubw %ymm3, %ymm1, %ymm0 326; X64-NEXT: retq 327 %1 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) 328 %2 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a2, <16 x i16> %a3) 329 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 12, i32 13, i32 14, i32 15, i32 28, i32 29, i32 30, i32 31> 330 ret <16 x i16> %3 331} 332 333define <32 x i8> @test_unpackl_packss_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2, <16 x i16> %a3) { 334; X32-LABEL: test_unpackl_packss_256: 335; X32: ## %bb.0: 336; X32-NEXT: vpacksswb %ymm2, %ymm0, %ymm0 337; X32-NEXT: retl 338; 339; X64-LABEL: test_unpackl_packss_256: 340; X64: ## %bb.0: 341; X64-NEXT: vpacksswb %ymm2, %ymm0, %ymm0 342; X64-NEXT: retq 343 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) 344 %2 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a2, <16 x i16> %a3) 345 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55> 346 ret <32 x i8> %3 347} 348 349define <16 x i16> @test_unpackh_packss_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) { 350; X32-LABEL: test_unpackh_packss_256: 351; X32: ## %bb.0: 352; X32-NEXT: vpackssdw %ymm3, %ymm1, %ymm0 353; X32-NEXT: retl 354; 355; X64-LABEL: test_unpackh_packss_256: 356; X64: ## %bb.0: 357; X64-NEXT: vpackssdw %ymm3, %ymm1, %ymm0 358; X64-NEXT: retq 359 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) 360 %2 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a2, <8 x i32> %a3) 361 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 12, i32 13, i32 14, i32 15, i32 28, i32 29, i32 30, i32 31> 362 ret <16 x i16> %3 363} 364 365define <16 x i16> @test_unpackl_packus_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) { 366; X32-LABEL: test_unpackl_packus_256: 367; X32: ## %bb.0: 368; X32-NEXT: vpackusdw %ymm2, %ymm0, %ymm0 369; X32-NEXT: retl 370; 371; X64-LABEL: test_unpackl_packus_256: 372; X64: ## %bb.0: 373; X64-NEXT: vpackusdw %ymm2, %ymm0, %ymm0 374; X64-NEXT: retq 375 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) 376 %2 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a2, <8 x i32> %a3) 377 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27> 378 ret <16 x i16> %3 379} 380 381define <32 x i8> @test_unpackh_packus_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2, <16 x i16> %a3) { 382; X32-LABEL: test_unpackh_packus_256: 383; X32: ## %bb.0: 384; X32-NEXT: vpacksswb %ymm3, %ymm1, %ymm0 385; X32-NEXT: retl 386; 387; X64-LABEL: test_unpackh_packus_256: 388; X64: ## %bb.0: 389; X64-NEXT: vpacksswb %ymm3, %ymm1, %ymm0 390; X64-NEXT: retq 391 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) 392 %2 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a2, <16 x i16> %a3) 393 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 394 ret <32 x i8> %3 395} 396 397declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) 398declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) 399declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) 400declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) 401 402declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) 403declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) 404declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) 405declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) 406 407declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) 408declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) 409declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) 410declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) 411 412declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) 413declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) 414declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) 415declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) 416 417declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) 418declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) 419declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) 420declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) 421 422declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) 423declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) 424declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) 425declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) 426