1; RUN: opt -S -slp-vectorizer -dce -instcombine < %s | FileCheck %s --check-prefix=GENERIC 2; RUN: opt -S -mcpu=kryo -slp-vectorizer -dce -instcombine < %s | FileCheck %s --check-prefix=KRYO 3 4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 5target triple = "aarch64--linux-gnu" 6 7; These tests check that we vectorize the index calculations in the 8; gather-reduce pattern shown below. We check cases having i32 and i64 9; subtraction. 10; 11; int gather_reduce_8x16(short *a, short *b, short *g, int n) { 12; int sum = 0; 13; for (int i = 0; i < n ; ++i) { 14; sum += g[*a++ - b[0]]; sum += g[*a++ - b[1]]; 15; sum += g[*a++ - b[2]]; sum += g[*a++ - b[3]]; 16; sum += g[*a++ - b[4]]; sum += g[*a++ - b[5]]; 17; sum += g[*a++ - b[6]]; sum += g[*a++ - b[7]]; 18; } 19; return sum; 20; } 21 22; GENERIC-LABEL: @gather_reduce_8x16_i32 23; 24; GENERIC: [[L:%[a-zA-Z0-9.]+]] = load <8 x i16> 25; GENERIC: zext <8 x i16> [[L]] to <8 x i32> 26; GENERIC: [[S:%[a-zA-Z0-9.]+]] = sub nsw <8 x i32> 27; GENERIC: [[X:%[a-zA-Z0-9.]+]] = extractelement <8 x i32> [[S]] 28; GENERIC: sext i32 [[X]] to i64 29; 30define i32 @gather_reduce_8x16_i32(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %g, i32 %n) { 31entry: 32 %cmp.99 = icmp sgt i32 %n, 0 33 br i1 %cmp.99, label %for.body.preheader, label %for.cond.cleanup 34 35for.body.preheader: 36 br label %for.body 37 38for.cond.cleanup.loopexit: 39 br label %for.cond.cleanup 40 41for.cond.cleanup: 42 %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add66, %for.cond.cleanup.loopexit ] 43 ret i32 %sum.0.lcssa 44 45for.body: 46 %i.0103 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] 47 %sum.0102 = phi i32 [ %add66, %for.body ], [ 0, %for.body.preheader ] 48 %a.addr.0101 = phi i16* [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ] 49 %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.0101, i64 1 50 %0 = load i16, i16* %a.addr.0101, align 2 51 %conv = zext i16 %0 to i32 52 %incdec.ptr1 = getelementptr inbounds i16, i16* %b, i64 1 53 %1 = load i16, i16* %b, align 2 54 %conv2 = zext i16 %1 to i32 55 %sub = sub nsw i32 %conv, %conv2 56 %arrayidx = getelementptr inbounds i16, i16* %g, i32 %sub 57 %2 = load i16, i16* %arrayidx, align 2 58 %conv3 = zext i16 %2 to i32 59 %add = add nsw i32 %conv3, %sum.0102 60 %incdec.ptr4 = getelementptr inbounds i16, i16* %a.addr.0101, i64 2 61 %3 = load i16, i16* %incdec.ptr, align 2 62 %conv5 = zext i16 %3 to i32 63 %incdec.ptr6 = getelementptr inbounds i16, i16* %b, i64 2 64 %4 = load i16, i16* %incdec.ptr1, align 2 65 %conv7 = zext i16 %4 to i32 66 %sub8 = sub nsw i32 %conv5, %conv7 67 %arrayidx10 = getelementptr inbounds i16, i16* %g, i32 %sub8 68 %5 = load i16, i16* %arrayidx10, align 2 69 %conv11 = zext i16 %5 to i32 70 %add12 = add nsw i32 %add, %conv11 71 %incdec.ptr13 = getelementptr inbounds i16, i16* %a.addr.0101, i64 3 72 %6 = load i16, i16* %incdec.ptr4, align 2 73 %conv14 = zext i16 %6 to i32 74 %incdec.ptr15 = getelementptr inbounds i16, i16* %b, i64 3 75 %7 = load i16, i16* %incdec.ptr6, align 2 76 %conv16 = zext i16 %7 to i32 77 %sub17 = sub nsw i32 %conv14, %conv16 78 %arrayidx19 = getelementptr inbounds i16, i16* %g, i32 %sub17 79 %8 = load i16, i16* %arrayidx19, align 2 80 %conv20 = zext i16 %8 to i32 81 %add21 = add nsw i32 %add12, %conv20 82 %incdec.ptr22 = getelementptr inbounds i16, i16* %a.addr.0101, i64 4 83 %9 = load i16, i16* %incdec.ptr13, align 2 84 %conv23 = zext i16 %9 to i32 85 %incdec.ptr24 = getelementptr inbounds i16, i16* %b, i64 4 86 %10 = load i16, i16* %incdec.ptr15, align 2 87 %conv25 = zext i16 %10 to i32 88 %sub26 = sub nsw i32 %conv23, %conv25 89 %arrayidx28 = getelementptr inbounds i16, i16* %g, i32 %sub26 90 %11 = load i16, i16* %arrayidx28, align 2 91 %conv29 = zext i16 %11 to i32 92 %add30 = add nsw i32 %add21, %conv29 93 %incdec.ptr31 = getelementptr inbounds i16, i16* %a.addr.0101, i64 5 94 %12 = load i16, i16* %incdec.ptr22, align 2 95 %conv32 = zext i16 %12 to i32 96 %incdec.ptr33 = getelementptr inbounds i16, i16* %b, i64 5 97 %13 = load i16, i16* %incdec.ptr24, align 2 98 %conv34 = zext i16 %13 to i32 99 %sub35 = sub nsw i32 %conv32, %conv34 100 %arrayidx37 = getelementptr inbounds i16, i16* %g, i32 %sub35 101 %14 = load i16, i16* %arrayidx37, align 2 102 %conv38 = zext i16 %14 to i32 103 %add39 = add nsw i32 %add30, %conv38 104 %incdec.ptr40 = getelementptr inbounds i16, i16* %a.addr.0101, i64 6 105 %15 = load i16, i16* %incdec.ptr31, align 2 106 %conv41 = zext i16 %15 to i32 107 %incdec.ptr42 = getelementptr inbounds i16, i16* %b, i64 6 108 %16 = load i16, i16* %incdec.ptr33, align 2 109 %conv43 = zext i16 %16 to i32 110 %sub44 = sub nsw i32 %conv41, %conv43 111 %arrayidx46 = getelementptr inbounds i16, i16* %g, i32 %sub44 112 %17 = load i16, i16* %arrayidx46, align 2 113 %conv47 = zext i16 %17 to i32 114 %add48 = add nsw i32 %add39, %conv47 115 %incdec.ptr49 = getelementptr inbounds i16, i16* %a.addr.0101, i64 7 116 %18 = load i16, i16* %incdec.ptr40, align 2 117 %conv50 = zext i16 %18 to i32 118 %incdec.ptr51 = getelementptr inbounds i16, i16* %b, i64 7 119 %19 = load i16, i16* %incdec.ptr42, align 2 120 %conv52 = zext i16 %19 to i32 121 %sub53 = sub nsw i32 %conv50, %conv52 122 %arrayidx55 = getelementptr inbounds i16, i16* %g, i32 %sub53 123 %20 = load i16, i16* %arrayidx55, align 2 124 %conv56 = zext i16 %20 to i32 125 %add57 = add nsw i32 %add48, %conv56 126 %incdec.ptr58 = getelementptr inbounds i16, i16* %a.addr.0101, i64 8 127 %21 = load i16, i16* %incdec.ptr49, align 2 128 %conv59 = zext i16 %21 to i32 129 %22 = load i16, i16* %incdec.ptr51, align 2 130 %conv61 = zext i16 %22 to i32 131 %sub62 = sub nsw i32 %conv59, %conv61 132 %arrayidx64 = getelementptr inbounds i16, i16* %g, i32 %sub62 133 %23 = load i16, i16* %arrayidx64, align 2 134 %conv65 = zext i16 %23 to i32 135 %add66 = add nsw i32 %add57, %conv65 136 %inc = add nuw nsw i32 %i.0103, 1 137 %exitcond = icmp eq i32 %inc, %n 138 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 139} 140 141; KRYO-LABEL: @gather_reduce_8x16_i64 142; 143; KRYO: [[L:%[a-zA-Z0-9.]+]] = load <8 x i16> 144; KRYO: zext <8 x i16> [[L]] to <8 x i32> 145; KRYO: [[S:%[a-zA-Z0-9.]+]] = sub nsw <8 x i32> 146; KRYO: [[X:%[a-zA-Z0-9.]+]] = extractelement <8 x i32> [[S]] 147; KRYO: sext i32 [[X]] to i64 148; 149define i32 @gather_reduce_8x16_i64(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %g, i32 %n) { 150entry: 151 %cmp.99 = icmp sgt i32 %n, 0 152 br i1 %cmp.99, label %for.body.preheader, label %for.cond.cleanup 153 154for.body.preheader: 155 br label %for.body 156 157for.cond.cleanup.loopexit: 158 br label %for.cond.cleanup 159 160for.cond.cleanup: 161 %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add66, %for.cond.cleanup.loopexit ] 162 ret i32 %sum.0.lcssa 163 164for.body: 165 %i.0103 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] 166 %sum.0102 = phi i32 [ %add66, %for.body ], [ 0, %for.body.preheader ] 167 %a.addr.0101 = phi i16* [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ] 168 %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.0101, i64 1 169 %0 = load i16, i16* %a.addr.0101, align 2 170 %conv = zext i16 %0 to i64 171 %incdec.ptr1 = getelementptr inbounds i16, i16* %b, i64 1 172 %1 = load i16, i16* %b, align 2 173 %conv2 = zext i16 %1 to i64 174 %sub = sub nsw i64 %conv, %conv2 175 %arrayidx = getelementptr inbounds i16, i16* %g, i64 %sub 176 %2 = load i16, i16* %arrayidx, align 2 177 %conv3 = zext i16 %2 to i32 178 %add = add nsw i32 %conv3, %sum.0102 179 %incdec.ptr4 = getelementptr inbounds i16, i16* %a.addr.0101, i64 2 180 %3 = load i16, i16* %incdec.ptr, align 2 181 %conv5 = zext i16 %3 to i64 182 %incdec.ptr6 = getelementptr inbounds i16, i16* %b, i64 2 183 %4 = load i16, i16* %incdec.ptr1, align 2 184 %conv7 = zext i16 %4 to i64 185 %sub8 = sub nsw i64 %conv5, %conv7 186 %arrayidx10 = getelementptr inbounds i16, i16* %g, i64 %sub8 187 %5 = load i16, i16* %arrayidx10, align 2 188 %conv11 = zext i16 %5 to i32 189 %add12 = add nsw i32 %add, %conv11 190 %incdec.ptr13 = getelementptr inbounds i16, i16* %a.addr.0101, i64 3 191 %6 = load i16, i16* %incdec.ptr4, align 2 192 %conv14 = zext i16 %6 to i64 193 %incdec.ptr15 = getelementptr inbounds i16, i16* %b, i64 3 194 %7 = load i16, i16* %incdec.ptr6, align 2 195 %conv16 = zext i16 %7 to i64 196 %sub17 = sub nsw i64 %conv14, %conv16 197 %arrayidx19 = getelementptr inbounds i16, i16* %g, i64 %sub17 198 %8 = load i16, i16* %arrayidx19, align 2 199 %conv20 = zext i16 %8 to i32 200 %add21 = add nsw i32 %add12, %conv20 201 %incdec.ptr22 = getelementptr inbounds i16, i16* %a.addr.0101, i64 4 202 %9 = load i16, i16* %incdec.ptr13, align 2 203 %conv23 = zext i16 %9 to i64 204 %incdec.ptr24 = getelementptr inbounds i16, i16* %b, i64 4 205 %10 = load i16, i16* %incdec.ptr15, align 2 206 %conv25 = zext i16 %10 to i64 207 %sub26 = sub nsw i64 %conv23, %conv25 208 %arrayidx28 = getelementptr inbounds i16, i16* %g, i64 %sub26 209 %11 = load i16, i16* %arrayidx28, align 2 210 %conv29 = zext i16 %11 to i32 211 %add30 = add nsw i32 %add21, %conv29 212 %incdec.ptr31 = getelementptr inbounds i16, i16* %a.addr.0101, i64 5 213 %12 = load i16, i16* %incdec.ptr22, align 2 214 %conv32 = zext i16 %12 to i64 215 %incdec.ptr33 = getelementptr inbounds i16, i16* %b, i64 5 216 %13 = load i16, i16* %incdec.ptr24, align 2 217 %conv34 = zext i16 %13 to i64 218 %sub35 = sub nsw i64 %conv32, %conv34 219 %arrayidx37 = getelementptr inbounds i16, i16* %g, i64 %sub35 220 %14 = load i16, i16* %arrayidx37, align 2 221 %conv38 = zext i16 %14 to i32 222 %add39 = add nsw i32 %add30, %conv38 223 %incdec.ptr40 = getelementptr inbounds i16, i16* %a.addr.0101, i64 6 224 %15 = load i16, i16* %incdec.ptr31, align 2 225 %conv41 = zext i16 %15 to i64 226 %incdec.ptr42 = getelementptr inbounds i16, i16* %b, i64 6 227 %16 = load i16, i16* %incdec.ptr33, align 2 228 %conv43 = zext i16 %16 to i64 229 %sub44 = sub nsw i64 %conv41, %conv43 230 %arrayidx46 = getelementptr inbounds i16, i16* %g, i64 %sub44 231 %17 = load i16, i16* %arrayidx46, align 2 232 %conv47 = zext i16 %17 to i32 233 %add48 = add nsw i32 %add39, %conv47 234 %incdec.ptr49 = getelementptr inbounds i16, i16* %a.addr.0101, i64 7 235 %18 = load i16, i16* %incdec.ptr40, align 2 236 %conv50 = zext i16 %18 to i64 237 %incdec.ptr51 = getelementptr inbounds i16, i16* %b, i64 7 238 %19 = load i16, i16* %incdec.ptr42, align 2 239 %conv52 = zext i16 %19 to i64 240 %sub53 = sub nsw i64 %conv50, %conv52 241 %arrayidx55 = getelementptr inbounds i16, i16* %g, i64 %sub53 242 %20 = load i16, i16* %arrayidx55, align 2 243 %conv56 = zext i16 %20 to i32 244 %add57 = add nsw i32 %add48, %conv56 245 %incdec.ptr58 = getelementptr inbounds i16, i16* %a.addr.0101, i64 8 246 %21 = load i16, i16* %incdec.ptr49, align 2 247 %conv59 = zext i16 %21 to i64 248 %22 = load i16, i16* %incdec.ptr51, align 2 249 %conv61 = zext i16 %22 to i64 250 %sub62 = sub nsw i64 %conv59, %conv61 251 %arrayidx64 = getelementptr inbounds i16, i16* %g, i64 %sub62 252 %23 = load i16, i16* %arrayidx64, align 2 253 %conv65 = zext i16 %23 to i32 254 %add66 = add nsw i32 %add57, %conv65 255 %inc = add nuw nsw i32 %i.0103, 1 256 %exitcond = icmp eq i32 %inc, %n 257 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 258} 259