1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSSE3 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=pentium4 | FileCheck %s -check-prefix=SSE2 4 5define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 6; AVX: sext_8i16_to_8i32 7; AVX: vpmovsxwd 8 9 %B = sext <8 x i16> %A to <8 x i32> 10 ret <8 x i32>%B 11} 12 13define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 14; AVX: sext_4i32_to_4i64 15; AVX: vpmovsxdq 16 17 %B = sext <4 x i32> %A to <4 x i64> 18 ret <4 x i64>%B 19} 20 21; AVX: load_sext_test1 22; AVX: vpmovsxwd (%r{{[^,]*}}), %xmm{{.*}} 23; AVX: ret 24 25; SSSE3: load_sext_test1 26; SSSE3: movq 27; SSSE3: punpcklwd %xmm{{.*}}, %xmm{{.*}} 28; SSSE3: psrad $16 29; SSSE3: ret 30 31; SSE2: load_sext_test1 32; SSE2: movq 33; SSE2: punpcklwd %xmm{{.*}}, %xmm{{.*}} 34; SSE2: psrad $16 35; SSE2: ret 36define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) { 37 %X = load <4 x i16>* %ptr 38 %Y = sext <4 x i16> %X to <4 x i32> 39 ret <4 x i32>%Y 40} 41 42; AVX: load_sext_test2 43; AVX: vpmovsxbd (%r{{[^,]*}}), %xmm{{.*}} 44; AVX: ret 45 46; SSSE3: load_sext_test2 47; SSSE3: movd 48; SSSE3: pshufb 49; SSSE3: psrad $24 50; SSSE3: ret 51 52; SSE2: load_sext_test2 53; SSE2: movl 54; SSE2: psrad $24 55; SSE2: ret 56define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) { 57 %X = load <4 x i8>* %ptr 58 %Y = sext <4 x i8> %X to <4 x i32> 59 ret <4 x i32>%Y 60} 61 62; AVX: load_sext_test3 63; AVX: vpmovsxbq (%r{{[^,]*}}), %xmm{{.*}} 64; AVX: ret 65 66; SSSE3: load_sext_test3 67; SSSE3: movsbq 68; SSSE3: movsbq 69; SSSE3: punpcklqdq 70; SSSE3: ret 71 72; SSE2: load_sext_test3 73; SSE2: movsbq 74; SSE2: movsbq 75; SSE2: punpcklqdq 76; SSE2: ret 77define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) { 78 %X = load <2 x i8>* %ptr 79 %Y = sext <2 x i8> %X to <2 x i64> 80 ret <2 x i64>%Y 81} 82 83; AVX: load_sext_test4 84; AVX: vpmovsxwq (%r{{[^,]*}}), %xmm{{.*}} 85; AVX: ret 86 87; SSSE3: load_sext_test4 88; SSSE3: movswq 89; SSSE3: movswq 90; SSSE3: punpcklqdq 91; SSSE3: ret 92 93; SSE2: load_sext_test4 94; SSE2: movswq 95; SSE2: movswq 96; SSE2: punpcklqdq 97; SSE2: ret 98define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) { 99 %X = load <2 x i16>* %ptr 100 %Y = sext <2 x i16> %X to <2 x i64> 101 ret <2 x i64>%Y 102} 103 104; AVX: load_sext_test5 105; AVX: vpmovsxdq (%r{{[^,]*}}), %xmm{{.*}} 106; AVX: ret 107 108; SSSE3: load_sext_test5 109; SSSE3: movslq 110; SSSE3: movslq 111; SSSE3: punpcklqdq 112; SSSE3: ret 113 114; SSE2: load_sext_test5 115; SSE2: movslq 116; SSE2: movslq 117; SSE2: punpcklqdq 118; SSE2: ret 119define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) { 120 %X = load <2 x i32>* %ptr 121 %Y = sext <2 x i32> %X to <2 x i64> 122 ret <2 x i64>%Y 123} 124 125; AVX: load_sext_test6 126; AVX: vpmovsxbw (%r{{[^,]*}}), %xmm{{.*}} 127; AVX: ret 128 129; SSSE3: load_sext_test6 130; SSSE3: movq 131; SSSE3: punpcklbw 132; SSSE3: psraw $8 133; SSSE3: ret 134 135; SSE2: load_sext_test6 136; SSE2: movq 137; SSE2: punpcklbw 138; SSE2: psraw $8 139; SSE2: ret 140define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) { 141 %X = load <8 x i8>* %ptr 142 %Y = sext <8 x i8> %X to <8 x i16> 143 ret <8 x i16>%Y 144} 145 146; AVX: sext_4i1_to_4i64 147; AVX: vpslld $31 148; AVX: vpsrad $31 149; AVX: vpmovsxdq 150; AVX: vpmovsxdq 151; AVX: ret 152define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) { 153 %extmask = sext <4 x i1> %mask to <4 x i64> 154 ret <4 x i64> %extmask 155} 156 157; AVX: sext_4i8_to_4i64 158; AVX: vpslld $24 159; AVX: vpsrad $24 160; AVX: vpmovsxdq 161; AVX: vpmovsxdq 162; AVX: ret 163define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) { 164 %extmask = sext <4 x i8> %mask to <4 x i64> 165 ret <4 x i64> %extmask 166} 167 168