1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE 4; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 5; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 6; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX2 7 8define void @store_i32(i32* nocapture %0, i32 %1, i32 %2) { 9; CHECK-LABEL: @store_i32( 10; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>* 11; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4, [[TBAA0:!tbaa !.*]] 12; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1:%.*]], i32 0 13; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> zeroinitializer 14; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP5]], [[TMP7]] 15; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], <i32 15, i32 15, i32 15, i32 15> 16; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <4 x i32> [[TMP9]], <i32 255, i32 255, i32 255, i32 255> 17; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP9]], <4 x i32> <i32 255, i32 255, i32 255, i32 255> 18; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* 19; CHECK-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* [[TMP12]], align 4, [[TBAA0]] 20; CHECK-NEXT: ret void 21; 22 %4 = load i32, i32* %0, align 4, !tbaa !2 23 %5 = mul i32 %4, %1 24 %6 = lshr i32 %5, 15 25 %7 = icmp ult i32 %6, 255 26 %8 = select i1 %7, i32 %6, i32 255 27 store i32 %8, i32* %0, align 4, !tbaa !2 28 %9 = getelementptr inbounds i32, i32* %0, i64 1 29 %10 = load i32, i32* %9, align 4, !tbaa !2 30 %11 = mul i32 %10, %1 31 %12 = lshr i32 %11, 15 32 %13 = icmp ult i32 %12, 255 33 %14 = select i1 %13, i32 %12, i32 255 34 store i32 %14, i32* %9, align 4, !tbaa !2 35 %15 = getelementptr inbounds i32, i32* %0, i64 2 36 %16 = load i32, i32* %15, align 4, !tbaa !2 37 %17 = mul i32 %16, %1 38 %18 = lshr i32 %17, 15 39 %19 = icmp ult i32 %18, 255 40 %20 = select i1 %19, i32 %18, i32 255 41 store i32 %20, i32* %15, align 4, !tbaa !2 42 %21 = getelementptr inbounds i32, i32* %0, i64 3 43 %22 = load i32, i32* %21, align 4, !tbaa !2 44 %23 = mul i32 %22, %1 45 %24 = lshr i32 %23, 15 46 %25 = icmp ult i32 %24, 255 47 %26 = select i1 %25, i32 %24, i32 255 48 store i32 %26, i32* %21, align 4, !tbaa !2 49 ret void 50} 51 52define void @store_i8(i8* nocapture %0, i32 %1, i32 %2) { 53; CHECK-LABEL: @store_i8( 54; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP0:%.*]] to <4 x i8>* 55; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1, [[TBAA4:!tbaa !.*]] 56; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[TMP5]] to <4 x i32> 57; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1:%.*]], i32 0 58; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> undef, <4 x i32> zeroinitializer 59; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP8]], [[TMP6]] 60; CHECK-NEXT: [[TMP10:%.*]] = lshr <4 x i32> [[TMP9]], <i32 15, i32 15, i32 15, i32 15> 61; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], <i32 255, i32 255, i32 255, i32 255> 62; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP10]], <4 x i32> <i32 255, i32 255, i32 255, i32 255> 63; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP12]] to <4 x i8> 64; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>* 65; CHECK-NEXT: store <4 x i8> [[TMP13]], <4 x i8>* [[TMP14]], align 1, [[TBAA4]] 66; CHECK-NEXT: ret void 67; 68 %4 = load i8, i8* %0, align 1, !tbaa !6 69 %5 = zext i8 %4 to i32 70 %6 = mul i32 %5, %1 71 %7 = lshr i32 %6, 15 72 %8 = icmp ult i32 %7, 255 73 %9 = select i1 %8, i32 %7, i32 255 74 %10 = trunc i32 %9 to i8 75 store i8 %10, i8* %0, align 1, !tbaa !6 76 %11 = getelementptr inbounds i8, i8* %0, i64 1 77 %12 = load i8, i8* %11, align 1, !tbaa !6 78 %13 = zext i8 %12 to i32 79 %14 = mul i32 %13, %1 80 %15 = lshr i32 %14, 15 81 %16 = icmp ult i32 %15, 255 82 %17 = select i1 %16, i32 %15, i32 255 83 %18 = trunc i32 %17 to i8 84 store i8 %18, i8* %11, align 1, !tbaa !6 85 %19 = getelementptr inbounds i8, i8* %0, i64 2 86 %20 = load i8, i8* %19, align 1, !tbaa !6 87 %21 = zext i8 %20 to i32 88 %22 = mul i32 %21, %1 89 %23 = lshr i32 %22, 15 90 %24 = icmp ult i32 %23, 255 91 %25 = select i1 %24, i32 %23, i32 255 92 %26 = trunc i32 %25 to i8 93 store i8 %26, i8* %19, align 1, !tbaa !6 94 %27 = getelementptr inbounds i8, i8* %0, i64 3 95 %28 = load i8, i8* %27, align 1, !tbaa !6 96 %29 = zext i8 %28 to i32 97 %30 = mul i32 %29, %1 98 %31 = lshr i32 %30, 15 99 %32 = icmp ult i32 %31, 255 100 %33 = select i1 %32, i32 %31, i32 255 101 %34 = trunc i32 %33 to i8 102 store i8 %34, i8* %27, align 1, !tbaa !6 103 ret void 104} 105 106define void @store_i64(i64* nocapture %0, i32 %1, i32 %2) { 107; SSE-LABEL: @store_i64( 108; SSE-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64 109; SSE-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP0:%.*]], align 8, [[TBAA5:!tbaa !.*]] 110; SSE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], [[TMP4]] 111; SSE-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP6]], 15 112; SSE-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 113; SSE-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 255 114; SSE-NEXT: [[TMP10:%.*]] = and i64 [[TMP7]], 4294967295 115; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i64 [[TMP10]], i64 255 116; SSE-NEXT: store i64 [[TMP11]], i64* [[TMP0]], align 8, [[TBAA5]] 117; SSE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 1 118; SSE-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8, [[TBAA5]] 119; SSE-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP4]] 120; SSE-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 15 121; SSE-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32 122; SSE-NEXT: [[TMP17:%.*]] = icmp ult i32 [[TMP16]], 255 123; SSE-NEXT: [[TMP18:%.*]] = and i64 [[TMP15]], 4294967295 124; SSE-NEXT: [[TMP19:%.*]] = select i1 [[TMP17]], i64 [[TMP18]], i64 255 125; SSE-NEXT: store i64 [[TMP19]], i64* [[TMP12]], align 8, [[TBAA5]] 126; SSE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 2 127; SSE-NEXT: [[TMP21:%.*]] = load i64, i64* [[TMP20]], align 8, [[TBAA5]] 128; SSE-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], [[TMP4]] 129; SSE-NEXT: [[TMP23:%.*]] = lshr i64 [[TMP22]], 15 130; SSE-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 131; SSE-NEXT: [[TMP25:%.*]] = icmp ult i32 [[TMP24]], 255 132; SSE-NEXT: [[TMP26:%.*]] = and i64 [[TMP23]], 4294967295 133; SSE-NEXT: [[TMP27:%.*]] = select i1 [[TMP25]], i64 [[TMP26]], i64 255 134; SSE-NEXT: store i64 [[TMP27]], i64* [[TMP20]], align 8, [[TBAA5]] 135; SSE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 3 136; SSE-NEXT: [[TMP29:%.*]] = load i64, i64* [[TMP28]], align 8, [[TBAA5]] 137; SSE-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], [[TMP4]] 138; SSE-NEXT: [[TMP31:%.*]] = lshr i64 [[TMP30]], 15 139; SSE-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32 140; SSE-NEXT: [[TMP33:%.*]] = icmp ult i32 [[TMP32]], 255 141; SSE-NEXT: [[TMP34:%.*]] = and i64 [[TMP31]], 4294967295 142; SSE-NEXT: [[TMP35:%.*]] = select i1 [[TMP33]], i64 [[TMP34]], i64 255 143; SSE-NEXT: store i64 [[TMP35]], i64* [[TMP28]], align 8, [[TBAA5]] 144; SSE-NEXT: ret void 145; 146; AVX1-LABEL: @store_i64( 147; AVX1-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64 148; AVX1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP0:%.*]], align 8, [[TBAA5:!tbaa !.*]] 149; AVX1-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], [[TMP4]] 150; AVX1-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP6]], 15 151; AVX1-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 152; AVX1-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 255 153; AVX1-NEXT: [[TMP10:%.*]] = and i64 [[TMP7]], 4294967295 154; AVX1-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i64 [[TMP10]], i64 255 155; AVX1-NEXT: store i64 [[TMP11]], i64* [[TMP0]], align 8, [[TBAA5]] 156; AVX1-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 1 157; AVX1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8, [[TBAA5]] 158; AVX1-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP4]] 159; AVX1-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 15 160; AVX1-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32 161; AVX1-NEXT: [[TMP17:%.*]] = icmp ult i32 [[TMP16]], 255 162; AVX1-NEXT: [[TMP18:%.*]] = and i64 [[TMP15]], 4294967295 163; AVX1-NEXT: [[TMP19:%.*]] = select i1 [[TMP17]], i64 [[TMP18]], i64 255 164; AVX1-NEXT: store i64 [[TMP19]], i64* [[TMP12]], align 8, [[TBAA5]] 165; AVX1-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 2 166; AVX1-NEXT: [[TMP21:%.*]] = load i64, i64* [[TMP20]], align 8, [[TBAA5]] 167; AVX1-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], [[TMP4]] 168; AVX1-NEXT: [[TMP23:%.*]] = lshr i64 [[TMP22]], 15 169; AVX1-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 170; AVX1-NEXT: [[TMP25:%.*]] = icmp ult i32 [[TMP24]], 255 171; AVX1-NEXT: [[TMP26:%.*]] = and i64 [[TMP23]], 4294967295 172; AVX1-NEXT: [[TMP27:%.*]] = select i1 [[TMP25]], i64 [[TMP26]], i64 255 173; AVX1-NEXT: store i64 [[TMP27]], i64* [[TMP20]], align 8, [[TBAA5]] 174; AVX1-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 3 175; AVX1-NEXT: [[TMP29:%.*]] = load i64, i64* [[TMP28]], align 8, [[TBAA5]] 176; AVX1-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], [[TMP4]] 177; AVX1-NEXT: [[TMP31:%.*]] = lshr i64 [[TMP30]], 15 178; AVX1-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32 179; AVX1-NEXT: [[TMP33:%.*]] = icmp ult i32 [[TMP32]], 255 180; AVX1-NEXT: [[TMP34:%.*]] = and i64 [[TMP31]], 4294967295 181; AVX1-NEXT: [[TMP35:%.*]] = select i1 [[TMP33]], i64 [[TMP34]], i64 255 182; AVX1-NEXT: store i64 [[TMP35]], i64* [[TMP28]], align 8, [[TBAA5]] 183; AVX1-NEXT: ret void 184; 185; AVX2-LABEL: @store_i64( 186; AVX2-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64 187; AVX2-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP0:%.*]] to <4 x i64>* 188; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i64>, <4 x i64>* [[TMP5]], align 8, [[TBAA5:!tbaa !.*]] 189; AVX2-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 190; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> undef, <4 x i32> zeroinitializer 191; AVX2-NEXT: [[TMP9:%.*]] = mul <4 x i64> [[TMP6]], [[TMP8]] 192; AVX2-NEXT: [[TMP10:%.*]] = lshr <4 x i64> [[TMP9]], <i64 15, i64 15, i64 15, i64 15> 193; AVX2-NEXT: [[TMP11:%.*]] = trunc <4 x i64> [[TMP10]] to <4 x i32> 194; AVX2-NEXT: [[TMP12:%.*]] = icmp ult <4 x i32> [[TMP11]], <i32 255, i32 255, i32 255, i32 255> 195; AVX2-NEXT: [[TMP13:%.*]] = and <4 x i64> [[TMP10]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295> 196; AVX2-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP12]], <4 x i64> [[TMP13]], <4 x i64> <i64 255, i64 255, i64 255, i64 255> 197; AVX2-NEXT: [[TMP15:%.*]] = bitcast i64* [[TMP0]] to <4 x i64>* 198; AVX2-NEXT: store <4 x i64> [[TMP14]], <4 x i64>* [[TMP15]], align 8, [[TBAA5]] 199; AVX2-NEXT: ret void 200; 201 %4 = zext i32 %1 to i64 202 %5 = load i64, i64* %0, align 8, !tbaa !7 203 %6 = mul i64 %5, %4 204 %7 = lshr i64 %6, 15 205 %8 = trunc i64 %7 to i32 206 %9 = icmp ult i32 %8, 255 207 %10 = and i64 %7, 4294967295 208 %11 = select i1 %9, i64 %10, i64 255 209 store i64 %11, i64* %0, align 8, !tbaa !7 210 %12 = getelementptr inbounds i64, i64* %0, i64 1 211 %13 = load i64, i64* %12, align 8, !tbaa !7 212 %14 = mul i64 %13, %4 213 %15 = lshr i64 %14, 15 214 %16 = trunc i64 %15 to i32 215 %17 = icmp ult i32 %16, 255 216 %18 = and i64 %15, 4294967295 217 %19 = select i1 %17, i64 %18, i64 255 218 store i64 %19, i64* %12, align 8, !tbaa !7 219 %20 = getelementptr inbounds i64, i64* %0, i64 2 220 %21 = load i64, i64* %20, align 8, !tbaa !7 221 %22 = mul i64 %21, %4 222 %23 = lshr i64 %22, 15 223 %24 = trunc i64 %23 to i32 224 %25 = icmp ult i32 %24, 255 225 %26 = and i64 %23, 4294967295 226 %27 = select i1 %25, i64 %26, i64 255 227 store i64 %27, i64* %20, align 8, !tbaa !7 228 %28 = getelementptr inbounds i64, i64* %0, i64 3 229 %29 = load i64, i64* %28, align 8, !tbaa !7 230 %30 = mul i64 %29, %4 231 %31 = lshr i64 %30, 15 232 %32 = trunc i64 %31 to i32 233 %33 = icmp ult i32 %32, 255 234 %34 = and i64 %31, 4294967295 235 %35 = select i1 %33, i64 %34, i64 255 236 store i64 %35, i64* %28, align 8, !tbaa !7 237 ret void 238} 239 240!2 = !{!3, !3, i64 0} 241!3 = !{!"int", !4, i64 0} 242!4 = !{!"omnipotent char", !5, i64 0} 243!5 = !{!"Simple C++ TBAA"} 244!6 = !{!4, !4, i64 0} 245!7 = !{!8, !8, i64 0} 246!8 = !{!"long", !4, i64 0} 247