1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 4 5define <8 x i16> @test_llvm_x86_sse41_pmovsxbw(<16 x i8>* %a) { 6; SSE41-LABEL: test_llvm_x86_sse41_pmovsxbw: 7; SSE41: ## BB#0: 8; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 9; SSE41-NEXT: retq 10; 11; AVX-LABEL: test_llvm_x86_sse41_pmovsxbw: 12; AVX: ## BB#0: 13; AVX-NEXT: vpmovsxbw (%rdi), %xmm0 14; AVX-NEXT: retq 15 %1 = load <16 x i8>, <16 x i8>* %a, align 1 16 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 17 %3 = sext <8 x i8> %2 to <8 x i16> 18 ret <8 x i16> %3 19} 20 21define <4 x i32> @test_llvm_x86_sse41_pmovsxbd(<16 x i8>* %a) { 22; SSE41-LABEL: test_llvm_x86_sse41_pmovsxbd: 23; SSE41: ## BB#0: 24; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 25; SSE41-NEXT: retq 26; 27; AVX-LABEL: test_llvm_x86_sse41_pmovsxbd: 28; AVX: ## BB#0: 29; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 30; AVX-NEXT: retq 31 %1 = load <16 x i8>, <16 x i8>* %a, align 1 32 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 33 %3 = sext <4 x i8> %2 to <4 x i32> 34 ret <4 x i32> %3 35} 36 37define <2 x i64> @test_llvm_x86_sse41_pmovsxbq(<16 x i8>* %a) { 38; SSE41-LABEL: test_llvm_x86_sse41_pmovsxbq: 39; SSE41: ## BB#0: 40; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 41; SSE41-NEXT: retq 42; 43; AVX-LABEL: test_llvm_x86_sse41_pmovsxbq: 44; AVX: ## BB#0: 45; AVX-NEXT: vpmovsxbq (%rdi), %xmm0 46; AVX-NEXT: retq 47 %1 = load <16 x i8>, <16 x i8>* %a, align 1 48 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 49 %3 = sext <2 x i8> %2 to <2 x i64> 50 ret <2 x i64> %3 51} 52 53define <4 x i32> @test_llvm_x86_sse41_pmovsxwd(<8 x i16>* %a) { 54; SSE41-LABEL: test_llvm_x86_sse41_pmovsxwd: 55; SSE41: ## BB#0: 56; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 57; SSE41-NEXT: retq 58; 59; AVX-LABEL: test_llvm_x86_sse41_pmovsxwd: 60; AVX: ## BB#0: 61; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 62; AVX-NEXT: retq 63 %1 = load <8 x i16>, <8 x i16>* %a, align 1 64 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 65 %3 = sext <4 x i16> %2 to <4 x i32> 66 ret <4 x i32> %3 67} 68 69define <2 x i64> @test_llvm_x86_sse41_pmovsxwq(<8 x i16>* %a) { 70; SSE41-LABEL: test_llvm_x86_sse41_pmovsxwq: 71; SSE41: ## BB#0: 72; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 73; SSE41-NEXT: retq 74; 75; AVX-LABEL: test_llvm_x86_sse41_pmovsxwq: 76; AVX: ## BB#0: 77; AVX-NEXT: vpmovsxwq (%rdi), %xmm0 78; AVX-NEXT: retq 79 %1 = load <8 x i16>, <8 x i16>* %a, align 1 80 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 81 %3 = sext <2 x i16> %2 to <2 x i64> 82 ret <2 x i64> %3 83} 84 85define <2 x i64> @test_llvm_x86_sse41_pmovsxdq(<4 x i32>* %a) { 86; SSE41-LABEL: test_llvm_x86_sse41_pmovsxdq: 87; SSE41: ## BB#0: 88; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 89; SSE41-NEXT: retq 90; 91; AVX-LABEL: test_llvm_x86_sse41_pmovsxdq: 92; AVX: ## BB#0: 93; AVX-NEXT: vpmovsxdq (%rdi), %xmm0 94; AVX-NEXT: retq 95 %1 = load <4 x i32>, <4 x i32>* %a, align 1 96 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 97 %3 = sext <2 x i32> %2 to <2 x i64> 98 ret <2 x i64> %3 99} 100 101define <8 x i16> @test_llvm_x86_sse41_pmovzxbw(<16 x i8>* %a) { 102; SSE41-LABEL: test_llvm_x86_sse41_pmovzxbw: 103; SSE41: ## BB#0: 104; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 105; SSE41-NEXT: retq 106; 107; AVX-LABEL: test_llvm_x86_sse41_pmovzxbw: 108; AVX: ## BB#0: 109; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 110; AVX-NEXT: retq 111 %1 = load <16 x i8>, <16 x i8>* %a, align 1 112 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 113 %3 = zext <8 x i8> %2 to <8 x i16> 114 ret <8 x i16> %3 115} 116 117define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) { 118; SSE41-LABEL: test_llvm_x86_sse41_pmovzxbd: 119; SSE41: ## BB#0: 120; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 121; SSE41-NEXT: retq 122; 123; AVX-LABEL: test_llvm_x86_sse41_pmovzxbd: 124; AVX: ## BB#0: 125; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 126; AVX-NEXT: retq 127 %1 = load <16 x i8>, <16 x i8>* %a, align 1 128 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 129 %3 = zext <4 x i8> %2 to <4 x i32> 130 ret <4 x i32> %3 131} 132 133define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) { 134; SSE41-LABEL: test_llvm_x86_sse41_pmovzxbq: 135; SSE41: ## BB#0: 136; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 137; SSE41-NEXT: retq 138; 139; AVX-LABEL: test_llvm_x86_sse41_pmovzxbq: 140; AVX: ## BB#0: 141; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 142; AVX-NEXT: retq 143 %1 = load <16 x i8>, <16 x i8>* %a, align 1 144 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 145 %3 = zext <2 x i8> %2 to <2 x i64> 146 ret <2 x i64> %3 147} 148 149define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) { 150; SSE41-LABEL: test_llvm_x86_sse41_pmovzxwd: 151; SSE41: ## BB#0: 152; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 153; SSE41-NEXT: retq 154; 155; AVX-LABEL: test_llvm_x86_sse41_pmovzxwd: 156; AVX: ## BB#0: 157; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 158; AVX-NEXT: retq 159 %1 = load <8 x i16>, <8 x i16>* %a, align 1 160 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 161 %3 = zext <4 x i16> %2 to <4 x i32> 162 ret <4 x i32> %3 163} 164 165define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) { 166; SSE41-LABEL: test_llvm_x86_sse41_pmovzxwq: 167; SSE41: ## BB#0: 168; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 169; SSE41-NEXT: retq 170; 171; AVX-LABEL: test_llvm_x86_sse41_pmovzxwq: 172; AVX: ## BB#0: 173; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 174; AVX-NEXT: retq 175 %1 = load <8 x i16>, <8 x i16>* %a, align 1 176 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 177 %3 = zext <2 x i16> %2 to <2 x i64> 178 ret <2 x i64> %3 179} 180 181define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) { 182; SSE41-LABEL: test_llvm_x86_sse41_pmovzxdq: 183; SSE41: ## BB#0: 184; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 185; SSE41-NEXT: retq 186; 187; AVX-LABEL: test_llvm_x86_sse41_pmovzxdq: 188; AVX: ## BB#0: 189; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 190; AVX-NEXT: retq 191 %1 = load <4 x i32>, <4 x i32>* %a, align 1 192 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 193 %3 = zext <2 x i32> %2 to <2 x i64> 194 ret <2 x i64> %3 195} 196