1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 4; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin -mattr=+avx512vl,avx512bw | FileCheck %s --check-prefix=X32 5; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512vl,avx512bw | FileCheck %s --check-prefix=X64 6 7define <16 x i16> @test_llvm_x86_avx2_pmovsxbw(<16 x i8>* %a) { 8; X32-LABEL: test_llvm_x86_avx2_pmovsxbw: 9; X32: ## %bb.0: 10; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 11; X32-NEXT: vpmovsxbw (%eax), %ymm0 12; X32-NEXT: retl 13; 14; X64-LABEL: test_llvm_x86_avx2_pmovsxbw: 15; X64: ## %bb.0: 16; X64-NEXT: vpmovsxbw (%rdi), %ymm0 17; X64-NEXT: retq 18 %1 = load <16 x i8>, <16 x i8>* %a, align 1 19 %2 = sext <16 x i8> %1 to <16 x i16> 20 ret <16 x i16> %2 21} 22 23define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) { 24; X32-LABEL: test_llvm_x86_avx2_pmovsxbd: 25; X32: ## %bb.0: 26; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 27; X32-NEXT: vpmovsxbd (%eax), %ymm0 28; X32-NEXT: retl 29; 30; X64-LABEL: test_llvm_x86_avx2_pmovsxbd: 31; X64: ## %bb.0: 32; X64-NEXT: vpmovsxbd (%rdi), %ymm0 33; X64-NEXT: retq 34 %1 = load <16 x i8>, <16 x i8>* %a, align 1 35 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 36 %3 = sext <8 x i8> %2 to <8 x i32> 37 ret <8 x i32> %3 38} 39 40define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) { 41; X32-LABEL: test_llvm_x86_avx2_pmovsxbq: 42; X32: ## %bb.0: 43; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 44; X32-NEXT: vpmovsxbq (%eax), %ymm0 45; X32-NEXT: retl 46; 47; X64-LABEL: test_llvm_x86_avx2_pmovsxbq: 48; X64: ## %bb.0: 49; X64-NEXT: vpmovsxbq (%rdi), %ymm0 50; X64-NEXT: retq 51 %1 = load <16 x i8>, <16 x i8>* %a, align 1 52 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 53 %3 = sext <4 x i8> %2 to <4 x i64> 54 ret <4 x i64> %3 55} 56 57define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) { 58; X32-LABEL: test_llvm_x86_avx2_pmovsxwd: 59; X32: ## %bb.0: 60; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 61; X32-NEXT: vpmovsxwd (%eax), %ymm0 62; X32-NEXT: retl 63; 64; X64-LABEL: test_llvm_x86_avx2_pmovsxwd: 65; X64: ## %bb.0: 66; X64-NEXT: vpmovsxwd (%rdi), %ymm0 67; X64-NEXT: retq 68 %1 = load <8 x i16>, <8 x i16>* %a, align 1 69 %2 = sext <8 x i16> %1 to <8 x i32> 70 ret <8 x i32> %2 71} 72 73define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) { 74; X32-LABEL: test_llvm_x86_avx2_pmovsxwq: 75; X32: ## %bb.0: 76; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 77; X32-NEXT: vpmovsxwq (%eax), %ymm0 78; X32-NEXT: retl 79; 80; X64-LABEL: test_llvm_x86_avx2_pmovsxwq: 81; X64: ## %bb.0: 82; X64-NEXT: vpmovsxwq (%rdi), %ymm0 83; X64-NEXT: retq 84 %1 = load <8 x i16>, <8 x i16>* %a, align 1 85 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 86 %3 = sext <4 x i16> %2 to <4 x i64> 87 ret <4 x i64> %3 88} 89 90define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) { 91; X32-LABEL: test_llvm_x86_avx2_pmovsxdq: 92; X32: ## %bb.0: 93; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 94; X32-NEXT: vpmovsxdq (%eax), %ymm0 95; X32-NEXT: retl 96; 97; X64-LABEL: test_llvm_x86_avx2_pmovsxdq: 98; X64: ## %bb.0: 99; X64-NEXT: vpmovsxdq (%rdi), %ymm0 100; X64-NEXT: retq 101 %1 = load <4 x i32>, <4 x i32>* %a, align 1 102 %2 = sext <4 x i32> %1 to <4 x i64> 103 ret <4 x i64> %2 104} 105 106define <16 x i16> @test_llvm_x86_avx2_pmovzxbw(<16 x i8>* %a) { 107; X32-LABEL: test_llvm_x86_avx2_pmovzxbw: 108; X32: ## %bb.0: 109; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 110; X32-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 111; X32-NEXT: retl 112; 113; X64-LABEL: test_llvm_x86_avx2_pmovzxbw: 114; X64: ## %bb.0: 115; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 116; X64-NEXT: retq 117 %1 = load <16 x i8>, <16 x i8>* %a, align 1 118 %2 = zext <16 x i8> %1 to <16 x i16> 119 ret <16 x i16> %2 120} 121 122define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) { 123; X32-LABEL: test_llvm_x86_avx2_pmovzxbd: 124; X32: ## %bb.0: 125; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 126; X32-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 127; X32-NEXT: retl 128; 129; X64-LABEL: test_llvm_x86_avx2_pmovzxbd: 130; X64: ## %bb.0: 131; X64-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 132; X64-NEXT: retq 133 %1 = load <16 x i8>, <16 x i8>* %a, align 1 134 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 135 %3 = zext <8 x i8> %2 to <8 x i32> 136 ret <8 x i32> %3 137} 138 139define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) { 140; X32-LABEL: test_llvm_x86_avx2_pmovzxbq: 141; X32: ## %bb.0: 142; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 143; X32-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 144; X32-NEXT: retl 145; 146; X64-LABEL: test_llvm_x86_avx2_pmovzxbq: 147; X64: ## %bb.0: 148; X64-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 149; X64-NEXT: retq 150 %1 = load <16 x i8>, <16 x i8>* %a, align 1 151 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 152 %3 = zext <4 x i8> %2 to <4 x i64> 153 ret <4 x i64> %3 154} 155 156define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) { 157; X32-LABEL: test_llvm_x86_avx2_pmovzxwd: 158; X32: ## %bb.0: 159; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 160; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 161; X32-NEXT: retl 162; 163; X64-LABEL: test_llvm_x86_avx2_pmovzxwd: 164; X64: ## %bb.0: 165; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 166; X64-NEXT: retq 167 %1 = load <8 x i16>, <8 x i16>* %a, align 1 168 %2 = zext <8 x i16> %1 to <8 x i32> 169 ret <8 x i32> %2 170} 171 172define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) { 173; X32-LABEL: test_llvm_x86_avx2_pmovzxwq: 174; X32: ## %bb.0: 175; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 176; X32-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 177; X32-NEXT: retl 178; 179; X64-LABEL: test_llvm_x86_avx2_pmovzxwq: 180; X64: ## %bb.0: 181; X64-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 182; X64-NEXT: retq 183 %1 = load <8 x i16>, <8 x i16>* %a, align 1 184 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 185 %3 = zext <4 x i16> %2 to <4 x i64> 186 ret <4 x i64> %3 187} 188 189define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) { 190; X32-LABEL: test_llvm_x86_avx2_pmovzxdq: 191; X32: ## %bb.0: 192; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 193; X32-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 194; X32-NEXT: retl 195; 196; X64-LABEL: test_llvm_x86_avx2_pmovzxdq: 197; X64: ## %bb.0: 198; X64-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 199; X64-NEXT: retq 200 %1 = load <4 x i32>, <4 x i32>* %a, align 1 201 %2 = zext <4 x i32> %1 to <4 x i64> 202 ret <4 x i64> %2 203} 204