; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind { ; SKX-LABEL: extract_subvector128_v32i16: ; SKX: ## BB#0: ; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; SKX-NEXT: retq %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> ret <8 x i16> %r1 } define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounwind { ; SKX-LABEL: extract_subvector128_v32i16_first_element: ; SKX: ## BB#0: ; SKX-NEXT: ## kill: %XMM0 %XMM0 %ZMM0 ; SKX-NEXT: retq %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> ret <8 x i16> %r1 } define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind { ; SKX-LABEL: extract_subvector128_v64i8: ; SKX: ## BB#0: ; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; SKX-NEXT: retq %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> ret <16 x i8> %r1 } define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwind { ; SKX-LABEL: extract_subvector128_v64i8_first_element: ; SKX: ## BB#0: ; SKX-NEXT: ## kill: %XMM0 %XMM0 %ZMM0 ; SKX-NEXT: retq %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> ret <16 x i8> %r1 } define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind { ; SKX-LABEL: extract_subvector256_v32i16: ; SKX: ## BB#0: ; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; SKX-NEXT: retq %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> ret <16 x i16> %r1 } define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind { ; SKX-LABEL: extract_subvector256_v64i8: ; SKX: ## BB#0: ; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; SKX-NEXT: retq %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> ret <32 x i8> %r1 } define void @extract_subvector256_v8f64_store(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v8f64_store: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vextractf64x2 $1, %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> %1 = bitcast double* %addr to <2 x double>* store <2 x double> %0, <2 x double>* %1, align 1 ret void } define void @extract_subvector256_v8f32_store(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v8f32_store: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vextractf32x4 $1, %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> %1 = bitcast float* %addr to <4 x float>* store <4 x float> %0, <4 x float>* %1, align 1 ret void } define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v4i64_store: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vextracti64x2 $1, %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> %1 = bitcast i64* %addr to <2 x i64>* store <2 x i64> %0, <2 x i64>* %1, align 1 ret void } define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v8i32_store: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vextracti32x4 $1, %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> %1 = bitcast i32* %addr to <4 x i32>* store <4 x i32> %0, <4 x i32>* %1, align 1 ret void } define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v16i16_store: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vextracti32x4 $1, %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> %1 = bitcast i16* %addr to <8 x i16>* store <8 x i16> %0, <8 x i16>* %1, align 1 ret void } define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v32i8_store: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vextracti32x4 $1, %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> %1 = bitcast i8* %addr to <16 x i8>* store <16 x i8> %0, <16 x i8>* %1, align 1 ret void } define void @extract_subvector256_v4f64_store_lo(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v4f64_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovupd %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> %1 = bitcast double* %addr to <2 x double>* store <2 x double> %0, <2 x double>* %1, align 1 ret void } define void @extract_subvector256_v4f32_store_lo(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v4f32_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> %1 = bitcast float* %addr to <4 x float>* store <4 x float> %0, <4 x float>* %1, align 1 ret void } define void @extract_subvector256_v2i64_store_lo(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v2i64_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovdqu64 %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> %1 = bitcast i64* %addr to <2 x i64>* store <2 x i64> %0, <2 x i64>* %1, align 1 ret void } define void @extract_subvector256_v4i32_store_lo(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v4i32_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> %1 = bitcast i32* %addr to <4 x i32>* store <4 x i32> %0, <4 x i32>* %1, align 1 ret void } define void @extract_subvector256_v8i16_store_lo(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v8i16_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> %1 = bitcast i16* %addr to <8 x i16>* store <8 x i16> %0, <8 x i16>* %1, align 1 ret void } define void @extract_subvector256_v16i8_store_lo(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v16i8_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> %1 = bitcast i8* %addr to <16 x i8>* store <16 x i8> %0, <16 x i8>* %1, align 1 ret void } define void @extract_subvector512_v2f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v2f64_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovupd %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> %1 = bitcast double* %addr to <2 x double>* store <2 x double> %0, <2 x double>* %1, align 1 ret void } define void @extract_subvector512_v4f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v4f32_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> %1 = bitcast float* %addr to <4 x float>* store <4 x float> %0, <4 x float>* %1, align 1 ret void } define void @extract_subvector512_v2i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v2i64_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovdqu64 %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> %1 = bitcast i64* %addr to <2 x i64>* store <2 x i64> %0, <2 x i64>* %1, align 1 ret void } define void @extract_subvector512_v4i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v4i32_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> %1 = bitcast i32* %addr to <4 x i32>* store <4 x i32> %0, <4 x i32>* %1, align 1 ret void } define void @extract_subvector512_v8i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v8i16_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> %1 = bitcast i16* %addr to <8 x i16>* store <8 x i16> %0, <8 x i16>* %1, align 1 ret void } define void @extract_subvector512_v16i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v16i8_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> %1 = bitcast i8* %addr to <16 x i8>* store <16 x i8> %0, <16 x i8>* %1, align 1 ret void } define void @extract_subvector512_v4f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v4f64_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovupd %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> %1 = bitcast double* %addr to <4 x double>* store <4 x double> %0, <4 x double>* %1, align 1 ret void } define void @extract_subvector512_v8f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v8f32_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> %1 = bitcast float* %addr to <8 x float>* store <8 x float> %0, <8 x float>* %1, align 1 ret void } define void @extract_subvector512_v4i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v4i64_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovdqu64 %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> %1 = bitcast i64* %addr to <4 x i64>* store <4 x i64> %0, <4 x i64>* %1, align 1 ret void } define void @extract_subvector512_v8i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v8i32_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovdqu32 %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> %1 = bitcast i32* %addr to <8 x i32>* store <8 x i32> %0, <8 x i32>* %1, align 1 ret void } define void @extract_subvector512_v16i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v16i16_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovdqu32 %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> %1 = bitcast i16* %addr to <16 x i16>* store <16 x i16> %0, <16 x i16>* %1, align 1 ret void } define void @extract_subvector512_v32i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v32i8_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovdqu32 %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> %1 = bitcast i8* %addr to <32 x i8>* store <32 x i8> %0, <32 x i8>* %1, align 1 ret void }