1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 4 5define <16 x i16> @test_llvm_x86_avx2_pmovsxbw(<16 x i8>* %a) { 6; X32-LABEL: test_llvm_x86_avx2_pmovsxbw: 7; X32: ## BB#0: 8; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9; X32-NEXT: vpmovsxbw (%eax), %ymm0 10; X32-NEXT: retl 11; 12; X64-LABEL: test_llvm_x86_avx2_pmovsxbw: 13; X64: ## BB#0: 14; X64-NEXT: vpmovsxbw (%rdi), %ymm0 15; X64-NEXT: retq 16 %1 = load <16 x i8>, <16 x i8>* %a, align 1 17 %2 = sext <16 x i8> %1 to <16 x i16> 18 ret <16 x i16> %2 19} 20 21define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) { 22; X32-LABEL: test_llvm_x86_avx2_pmovsxbd: 23; X32: ## BB#0: 24; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 25; X32-NEXT: vpmovsxbd (%eax), %ymm0 26; X32-NEXT: retl 27; 28; X64-LABEL: test_llvm_x86_avx2_pmovsxbd: 29; X64: ## BB#0: 30; X64-NEXT: vpmovsxbd (%rdi), %ymm0 31; X64-NEXT: retq 32 %1 = load <16 x i8>, <16 x i8>* %a, align 1 33 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 34 %3 = sext <8 x i8> %2 to <8 x i32> 35 ret <8 x i32> %3 36} 37 38define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) { 39; X32-LABEL: test_llvm_x86_avx2_pmovsxbq: 40; X32: ## BB#0: 41; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 42; X32-NEXT: vpmovsxbq (%eax), %ymm0 43; X32-NEXT: retl 44; 45; X64-LABEL: test_llvm_x86_avx2_pmovsxbq: 46; X64: ## BB#0: 47; X64-NEXT: vpmovsxbq (%rdi), %ymm0 48; X64-NEXT: retq 49 %1 = load <16 x i8>, <16 x i8>* %a, align 1 50 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 51 %3 = sext <4 x i8> %2 to <4 x i64> 52 ret <4 x i64> %3 53} 54 55define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) { 56; X32-LABEL: test_llvm_x86_avx2_pmovsxwd: 57; X32: ## BB#0: 58; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 59; X32-NEXT: vpmovsxwd (%eax), %ymm0 60; X32-NEXT: retl 61; 62; X64-LABEL: test_llvm_x86_avx2_pmovsxwd: 63; X64: ## BB#0: 64; X64-NEXT: vpmovsxwd (%rdi), %ymm0 65; X64-NEXT: retq 66 %1 = load <8 x i16>, <8 x i16>* %a, align 1 67 %2 = sext <8 x i16> %1 to <8 x i32> 68 ret <8 x i32> %2 69} 70 71define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) { 72; X32-LABEL: test_llvm_x86_avx2_pmovsxwq: 73; X32: ## BB#0: 74; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 75; X32-NEXT: vpmovsxwq (%eax), %ymm0 76; X32-NEXT: retl 77; 78; X64-LABEL: test_llvm_x86_avx2_pmovsxwq: 79; X64: ## BB#0: 80; X64-NEXT: vpmovsxwq (%rdi), %ymm0 81; X64-NEXT: retq 82 %1 = load <8 x i16>, <8 x i16>* %a, align 1 83 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 84 %3 = sext <4 x i16> %2 to <4 x i64> 85 ret <4 x i64> %3 86} 87 88define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) { 89; X32-LABEL: test_llvm_x86_avx2_pmovsxdq: 90; X32: ## BB#0: 91; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 92; X32-NEXT: vpmovsxdq (%eax), %ymm0 93; X32-NEXT: retl 94; 95; X64-LABEL: test_llvm_x86_avx2_pmovsxdq: 96; X64: ## BB#0: 97; X64-NEXT: vpmovsxdq (%rdi), %ymm0 98; X64-NEXT: retq 99 %1 = load <4 x i32>, <4 x i32>* %a, align 1 100 %2 = sext <4 x i32> %1 to <4 x i64> 101 ret <4 x i64> %2 102} 103 104define <16 x i16> @test_llvm_x86_avx2_pmovzxbw(<16 x i8>* %a) { 105; X32-LABEL: test_llvm_x86_avx2_pmovzxbw: 106; X32: ## BB#0: 107; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 108; X32-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 109; X32-NEXT: retl 110; 111; X64-LABEL: test_llvm_x86_avx2_pmovzxbw: 112; X64: ## BB#0: 113; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 114; X64-NEXT: retq 115 %1 = load <16 x i8>, <16 x i8>* %a, align 1 116 %2 = zext <16 x i8> %1 to <16 x i16> 117 ret <16 x i16> %2 118} 119 120define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) { 121; X32-LABEL: test_llvm_x86_avx2_pmovzxbd: 122; X32: ## BB#0: 123; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 124; X32-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 125; X32-NEXT: retl 126; 127; X64-LABEL: test_llvm_x86_avx2_pmovzxbd: 128; X64: ## BB#0: 129; X64-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 130; X64-NEXT: retq 131 %1 = load <16 x i8>, <16 x i8>* %a, align 1 132 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 133 %3 = zext <8 x i8> %2 to <8 x i32> 134 ret <8 x i32> %3 135} 136 137define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) { 138; X32-LABEL: test_llvm_x86_avx2_pmovzxbq: 139; X32: ## BB#0: 140; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 141; X32-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 142; X32-NEXT: retl 143; 144; X64-LABEL: test_llvm_x86_avx2_pmovzxbq: 145; X64: ## BB#0: 146; X64-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 147; X64-NEXT: retq 148 %1 = load <16 x i8>, <16 x i8>* %a, align 1 149 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 150 %3 = zext <4 x i8> %2 to <4 x i64> 151 ret <4 x i64> %3 152} 153 154define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) { 155; X32-LABEL: test_llvm_x86_avx2_pmovzxwd: 156; X32: ## BB#0: 157; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 158; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 159; X32-NEXT: retl 160; 161; X64-LABEL: test_llvm_x86_avx2_pmovzxwd: 162; X64: ## BB#0: 163; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 164; X64-NEXT: retq 165 %1 = load <8 x i16>, <8 x i16>* %a, align 1 166 %2 = zext <8 x i16> %1 to <8 x i32> 167 ret <8 x i32> %2 168} 169 170define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) { 171; X32-LABEL: test_llvm_x86_avx2_pmovzxwq: 172; X32: ## BB#0: 173; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 174; X32-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 175; X32-NEXT: retl 176; 177; X64-LABEL: test_llvm_x86_avx2_pmovzxwq: 178; X64: ## BB#0: 179; X64-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 180; X64-NEXT: retq 181 %1 = load <8 x i16>, <8 x i16>* %a, align 1 182 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 183 %3 = zext <4 x i16> %2 to <4 x i64> 184 ret <4 x i64> %3 185} 186 187define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) { 188; X32-LABEL: test_llvm_x86_avx2_pmovzxdq: 189; X32: ## BB#0: 190; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 191; X32-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 192; X32-NEXT: retl 193; 194; X64-LABEL: test_llvm_x86_avx2_pmovzxdq: 195; X64: ## BB#0: 196; X64-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 197; X64-NEXT: retq 198 %1 = load <4 x i32>, <4 x i32>* %a, align 1 199 %2 = zext <4 x i32> %1 to <4 x i64> 200 ret <4 x i64> %2 201} 202