1; RUN: llc < %s -march=x86-64 -mcpu=penryn | FileCheck -check-prefix=SSE41 %s 2; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck -check-prefix=AVX1 %s 3; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck -check-prefix=AVX2 %s 4 5; PR14887 6; These tests inject a store into the chain to test the inreg versions of pmovsx 7 8define void @test1(<2 x i8>* %in, <2 x i64>* %out) nounwind { 9 %wide.load35 = load <2 x i8>* %in, align 1 10 %sext = sext <2 x i8> %wide.load35 to <2 x i64> 11 store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8 12 store <2 x i64> %sext, <2 x i64>* %out, align 8 13 ret void 14 15; SSE41: test1: 16; SSE41: pmovsxbq 17 18; AVX1: test1: 19; AVX1: vpmovsxbq 20 21; AVX2: test1: 22; AVX2: vpmovsxbq 23} 24 25define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind { 26 %wide.load35 = load <4 x i8>* %in, align 1 27 %sext = sext <4 x i8> %wide.load35 to <4 x i64> 28 store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8 29 store <4 x i64> %sext, <4 x i64>* %out, align 8 30 ret void 31 32; AVX2: test2: 33; AVX2: vpmovsxbq 34} 35 36define void @test3(<4 x i8>* %in, <4 x i32>* %out) nounwind { 37 %wide.load35 = load <4 x i8>* %in, align 1 38 %sext = sext <4 x i8> %wide.load35 to <4 x i32> 39 store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8 40 store <4 x i32> %sext, <4 x i32>* %out, align 8 41 ret void 42 43; SSE41: test3: 44; SSE41: pmovsxbd 45 46; AVX1: test3: 47; AVX1: vpmovsxbd 48 49; AVX2: test3: 50; AVX2: vpmovsxbd 51} 52 53define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind { 54 %wide.load35 = load <8 x i8>* %in, align 1 55 %sext = sext <8 x i8> %wide.load35 to <8 x i32> 56 store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8 57 store <8 x i32> %sext, <8 x i32>* %out, align 8 58 ret void 59 60; AVX2: test4: 61; AVX2: vpmovsxbd 62} 63 64define void @test5(<8 x i8>* %in, <8 x i16>* %out) nounwind { 65 %wide.load35 = load <8 x i8>* %in, align 1 66 %sext = sext <8 x i8> %wide.load35 to <8 x i16> 67 store <8 x i16> zeroinitializer, <8 x i16>* undef, align 8 68 store <8 x i16> %sext, <8 x i16>* %out, align 8 69 ret void 70 71; SSE41: test5: 72; SSE41: pmovsxbw 73 74; AVX1: test5: 75; AVX1: vpmovsxbw 76 77; AVX2: test5: 78; AVX2: vpmovsxbw 79} 80 81define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind { 82 %wide.load35 = load <16 x i8>* %in, align 1 83 %sext = sext <16 x i8> %wide.load35 to <16 x i16> 84 store <16 x i16> zeroinitializer, <16 x i16>* undef, align 8 85 store <16 x i16> %sext, <16 x i16>* %out, align 8 86 ret void 87 88; AVX2: test6: 89; FIXME: v16i8 -> v16i16 is scalarized. 90; AVX2-NOT: pmovsx 91} 92 93define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind { 94 %wide.load35 = load <2 x i16>* %in, align 1 95 %sext = sext <2 x i16> %wide.load35 to <2 x i64> 96 store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8 97 store <2 x i64> %sext, <2 x i64>* %out, align 8 98 ret void 99 100 101; SSE41: test7: 102; SSE41: pmovsxwq 103 104; AVX1: test7: 105; AVX1: vpmovsxwq 106 107; AVX2: test7: 108; AVX2: vpmovsxwq 109} 110 111define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind { 112 %wide.load35 = load <4 x i16>* %in, align 1 113 %sext = sext <4 x i16> %wide.load35 to <4 x i64> 114 store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8 115 store <4 x i64> %sext, <4 x i64>* %out, align 8 116 ret void 117 118; AVX2: test8: 119; AVX2: vpmovsxwq 120} 121 122define void @test9(<4 x i16>* %in, <4 x i32>* %out) nounwind { 123 %wide.load35 = load <4 x i16>* %in, align 1 124 %sext = sext <4 x i16> %wide.load35 to <4 x i32> 125 store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8 126 store <4 x i32> %sext, <4 x i32>* %out, align 8 127 ret void 128 129; SSE41: test9: 130; SSE41: pmovsxwd 131 132; AVX1: test9: 133; AVX1: vpmovsxwd 134 135; AVX2: test9: 136; AVX2: vpmovsxwd 137} 138 139define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind { 140 %wide.load35 = load <8 x i16>* %in, align 1 141 %sext = sext <8 x i16> %wide.load35 to <8 x i32> 142 store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8 143 store <8 x i32> %sext, <8 x i32>* %out, align 8 144 ret void 145 146; AVX2: test10: 147; AVX2: vpmovsxwd 148} 149 150define void @test11(<2 x i32>* %in, <2 x i64>* %out) nounwind { 151 %wide.load35 = load <2 x i32>* %in, align 1 152 %sext = sext <2 x i32> %wide.load35 to <2 x i64> 153 store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8 154 store <2 x i64> %sext, <2 x i64>* %out, align 8 155 ret void 156 157; SSE41: test11: 158; SSE41: pmovsxdq 159 160; AVX1: test11: 161; AVX1: vpmovsxdq 162 163; AVX2: test11: 164; AVX2: vpmovsxdq 165} 166 167define void @test12(<4 x i32>* %in, <4 x i64>* %out) nounwind { 168 %wide.load35 = load <4 x i32>* %in, align 1 169 %sext = sext <4 x i32> %wide.load35 to <4 x i64> 170 store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8 171 store <4 x i64> %sext, <4 x i64>* %out, align 8 172 ret void 173 174; AVX2: test12: 175; AVX2: vpmovsxdq 176} 177