1; RUN: opt -S -loop-vectorize -dce -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s 2 3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 4 5@A = common global [1024 x i32] zeroinitializer, align 16 6@fA = common global [1024 x float] zeroinitializer, align 16 7@dA = common global [1024 x double] zeroinitializer, align 16 8 9; Signed tests. 10 11; Turn this into a max reduction. Make sure we use a splat to initialize the 12; vector for the reduction. 13; CHECK-LABEL: @max_red( 14; CHECK: %[[VAR:.*]] = insertelement <2 x i32> undef, i32 %max, i32 0 15; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> undef, <2 x i32> zeroinitializer 16; CHECK: icmp sgt <2 x i32> 17; CHECK: select <2 x i1> 18; CHECK: middle.block 19; CHECK: icmp sgt <2 x i32> 20; CHECK: select <2 x i1> 21 22define i32 @max_red(i32 %max) { 23entry: 24 br label %for.body 25 26for.body: 27 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 28 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 29 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 30 %0 = load i32, i32* %arrayidx, align 4 31 %cmp3 = icmp sgt i32 %0, %max.red.08 32 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 33 %indvars.iv.next = add i64 %indvars.iv, 1 34 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 35 %exitcond = icmp eq i32 %lftr.wideiv, 1024 36 br i1 %exitcond, label %for.end, label %for.body 37 38for.end: 39 ret i32 %max.red.0 40} 41 42; Turn this into a max reduction. The select has its inputs reversed therefore 43; this is a max reduction. 44; CHECK-LABEL: @max_red_inverse_select( 45; CHECK: icmp slt <2 x i32> 46; CHECK: select <2 x i1> 47; CHECK: middle.block 48; CHECK: icmp sgt <2 x i32> 49; CHECK: select <2 x i1> 50 51define i32 @max_red_inverse_select(i32 %max) { 52entry: 53 br label %for.body 54 55for.body: 56 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 57 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 58 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 59 %0 = load i32, i32* %arrayidx, align 4 60 %cmp3 = icmp slt i32 %max.red.08, %0 61 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 62 %indvars.iv.next = add i64 %indvars.iv, 1 63 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 64 %exitcond = icmp eq i32 %lftr.wideiv, 1024 65 br i1 %exitcond, label %for.end, label %for.body 66 67for.end: 68 ret i32 %max.red.0 69} 70 71; Turn this into a min reduction. 72; CHECK-LABEL: @min_red( 73; CHECK: icmp slt <2 x i32> 74; CHECK: select <2 x i1> 75; CHECK: middle.block 76; CHECK: icmp slt <2 x i32> 77; CHECK: select <2 x i1> 78 79define i32 @min_red(i32 %max) { 80entry: 81 br label %for.body 82 83for.body: 84 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 85 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 86 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 87 %0 = load i32, i32* %arrayidx, align 4 88 %cmp3 = icmp slt i32 %0, %max.red.08 89 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 90 %indvars.iv.next = add i64 %indvars.iv, 1 91 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 92 %exitcond = icmp eq i32 %lftr.wideiv, 1024 93 br i1 %exitcond, label %for.end, label %for.body 94 95for.end: 96 ret i32 %max.red.0 97} 98 99; Turn this into a min reduction. The select has its inputs reversed therefore 100; this is a min reduction. 101; CHECK-LABEL: @min_red_inverse_select( 102; CHECK: icmp sgt <2 x i32> 103; CHECK: select <2 x i1> 104; CHECK: middle.block 105; CHECK: icmp slt <2 x i32> 106; CHECK: select <2 x i1> 107 108define i32 @min_red_inverse_select(i32 %max) { 109entry: 110 br label %for.body 111 112for.body: 113 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 114 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 115 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 116 %0 = load i32, i32* %arrayidx, align 4 117 %cmp3 = icmp sgt i32 %max.red.08, %0 118 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 119 %indvars.iv.next = add i64 %indvars.iv, 1 120 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 121 %exitcond = icmp eq i32 %lftr.wideiv, 1024 122 br i1 %exitcond, label %for.end, label %for.body 123 124for.end: 125 ret i32 %max.red.0 126} 127 128; Unsigned tests. 129 130; Turn this into a max reduction. 131; CHECK-LABEL: @umax_red( 132; CHECK: icmp ugt <2 x i32> 133; CHECK: select <2 x i1> 134; CHECK: middle.block 135; CHECK: icmp ugt <2 x i32> 136; CHECK: select <2 x i1> 137 138define i32 @umax_red(i32 %max) { 139entry: 140 br label %for.body 141 142for.body: 143 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 144 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 145 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 146 %0 = load i32, i32* %arrayidx, align 4 147 %cmp3 = icmp ugt i32 %0, %max.red.08 148 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 149 %indvars.iv.next = add i64 %indvars.iv, 1 150 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 151 %exitcond = icmp eq i32 %lftr.wideiv, 1024 152 br i1 %exitcond, label %for.end, label %for.body 153 154for.end: 155 ret i32 %max.red.0 156} 157 158; Turn this into a max reduction. The select has its inputs reversed therefore 159; this is a max reduction. 160; CHECK-LABEL: @umax_red_inverse_select( 161; CHECK: icmp ult <2 x i32> 162; CHECK: select <2 x i1> 163; CHECK: middle.block 164; CHECK: icmp ugt <2 x i32> 165; CHECK: select <2 x i1> 166 167define i32 @umax_red_inverse_select(i32 %max) { 168entry: 169 br label %for.body 170 171for.body: 172 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 173 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 174 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 175 %0 = load i32, i32* %arrayidx, align 4 176 %cmp3 = icmp ult i32 %max.red.08, %0 177 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 178 %indvars.iv.next = add i64 %indvars.iv, 1 179 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 180 %exitcond = icmp eq i32 %lftr.wideiv, 1024 181 br i1 %exitcond, label %for.end, label %for.body 182 183for.end: 184 ret i32 %max.red.0 185} 186 187; Turn this into a min reduction. 188; CHECK-LABEL: @umin_red( 189; CHECK: icmp ult <2 x i32> 190; CHECK: select <2 x i1> 191; CHECK: middle.block 192; CHECK: icmp ult <2 x i32> 193; CHECK: select <2 x i1> 194 195define i32 @umin_red(i32 %max) { 196entry: 197 br label %for.body 198 199for.body: 200 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 201 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 202 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 203 %0 = load i32, i32* %arrayidx, align 4 204 %cmp3 = icmp ult i32 %0, %max.red.08 205 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 206 %indvars.iv.next = add i64 %indvars.iv, 1 207 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 208 %exitcond = icmp eq i32 %lftr.wideiv, 1024 209 br i1 %exitcond, label %for.end, label %for.body 210 211for.end: 212 ret i32 %max.red.0 213} 214 215; Turn this into a min reduction. The select has its inputs reversed therefore 216; this is a min reduction. 217; CHECK-LABEL: @umin_red_inverse_select( 218; CHECK: icmp ugt <2 x i32> 219; CHECK: select <2 x i1> 220; CHECK: middle.block 221; CHECK: icmp ult <2 x i32> 222; CHECK: select <2 x i1> 223 224define i32 @umin_red_inverse_select(i32 %max) { 225entry: 226 br label %for.body 227 228for.body: 229 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 230 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 231 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 232 %0 = load i32, i32* %arrayidx, align 4 233 %cmp3 = icmp ugt i32 %max.red.08, %0 234 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 235 %indvars.iv.next = add i64 %indvars.iv, 1 236 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 237 %exitcond = icmp eq i32 %lftr.wideiv, 1024 238 br i1 %exitcond, label %for.end, label %for.body 239 240for.end: 241 ret i32 %max.red.0 242} 243 244; SGE -> SLT 245; Turn this into a min reduction (select inputs are reversed). 246; CHECK-LABEL: @sge_min_red( 247; CHECK: icmp sge <2 x i32> 248; CHECK: select <2 x i1> 249; CHECK: middle.block 250; CHECK: icmp slt <2 x i32> 251; CHECK: select <2 x i1> 252 253define i32 @sge_min_red(i32 %max) { 254entry: 255 br label %for.body 256 257for.body: 258 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 259 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 260 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 261 %0 = load i32, i32* %arrayidx, align 4 262 %cmp3 = icmp sge i32 %0, %max.red.08 263 %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 264 %indvars.iv.next = add i64 %indvars.iv, 1 265 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 266 %exitcond = icmp eq i32 %lftr.wideiv, 1024 267 br i1 %exitcond, label %for.end, label %for.body 268 269for.end: 270 ret i32 %max.red.0 271} 272 273; SLE -> SGT 274; Turn this into a max reduction (select inputs are reversed). 275; CHECK-LABEL: @sle_min_red( 276; CHECK: icmp sle <2 x i32> 277; CHECK: select <2 x i1> 278; CHECK: middle.block 279; CHECK: icmp sgt <2 x i32> 280; CHECK: select <2 x i1> 281 282define i32 @sle_min_red(i32 %max) { 283entry: 284 br label %for.body 285 286for.body: 287 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 288 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 289 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 290 %0 = load i32, i32* %arrayidx, align 4 291 %cmp3 = icmp sle i32 %0, %max.red.08 292 %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 293 %indvars.iv.next = add i64 %indvars.iv, 1 294 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 295 %exitcond = icmp eq i32 %lftr.wideiv, 1024 296 br i1 %exitcond, label %for.end, label %for.body 297 298for.end: 299 ret i32 %max.red.0 300} 301 302; UGE -> ULT 303; Turn this into a min reduction (select inputs are reversed). 304; CHECK-LABEL: @uge_min_red( 305; CHECK: icmp uge <2 x i32> 306; CHECK: select <2 x i1> 307; CHECK: middle.block 308; CHECK: icmp ult <2 x i32> 309; CHECK: select <2 x i1> 310 311define i32 @uge_min_red(i32 %max) { 312entry: 313 br label %for.body 314 315for.body: 316 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 317 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 318 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 319 %0 = load i32, i32* %arrayidx, align 4 320 %cmp3 = icmp uge i32 %0, %max.red.08 321 %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 322 %indvars.iv.next = add i64 %indvars.iv, 1 323 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 324 %exitcond = icmp eq i32 %lftr.wideiv, 1024 325 br i1 %exitcond, label %for.end, label %for.body 326 327for.end: 328 ret i32 %max.red.0 329} 330 331; ULE -> UGT 332; Turn this into a max reduction (select inputs are reversed). 333; CHECK-LABEL: @ule_min_red( 334; CHECK: icmp ule <2 x i32> 335; CHECK: select <2 x i1> 336; CHECK: middle.block 337; CHECK: icmp ugt <2 x i32> 338; CHECK: select <2 x i1> 339 340define i32 @ule_min_red(i32 %max) { 341entry: 342 br label %for.body 343 344for.body: 345 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 346 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 347 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 348 %0 = load i32, i32* %arrayidx, align 4 349 %cmp3 = icmp ule i32 %0, %max.red.08 350 %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 351 %indvars.iv.next = add i64 %indvars.iv, 1 352 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 353 %exitcond = icmp eq i32 %lftr.wideiv, 1024 354 br i1 %exitcond, label %for.end, label %for.body 355 356for.end: 357 ret i32 %max.red.0 358} 359 360; No reduction. 361; CHECK-LABEL: @no_red_1( 362; CHECK-NOT: icmp <2 x i32> 363define i32 @no_red_1(i32 %max) { 364entry: 365 br label %for.body 366 367for.body: 368 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 369 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 370 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 371 %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv 372 %0 = load i32, i32* %arrayidx, align 4 373 %1 = load i32, i32* %arrayidx1, align 4 374 %cmp3 = icmp sgt i32 %0, %1 375 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 376 %indvars.iv.next = add i64 %indvars.iv, 1 377 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 378 %exitcond = icmp eq i32 %lftr.wideiv, 1024 379 br i1 %exitcond, label %for.end, label %for.body 380 381for.end: 382 ret i32 %max.red.0 383} 384 385; CHECK-LABEL: @no_red_2( 386; CHECK-NOT: icmp <2 x i32> 387define i32 @no_red_2(i32 %max) { 388entry: 389 br label %for.body 390 391for.body: 392 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 393 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 394 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 395 %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv 396 %0 = load i32, i32* %arrayidx, align 4 397 %1 = load i32, i32* %arrayidx1, align 4 398 %cmp3 = icmp sgt i32 %0, %max.red.08 399 %max.red.0 = select i1 %cmp3, i32 %0, i32 %1 400 %indvars.iv.next = add i64 %indvars.iv, 1 401 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 402 %exitcond = icmp eq i32 %lftr.wideiv, 1024 403 br i1 %exitcond, label %for.end, label %for.body 404 405for.end: 406 ret i32 %max.red.0 407} 408 409; Float tests. 410 411; Maximum. 412 413; Turn this into a max reduction in the presence of a no-nans-fp-math attribute. 414; CHECK-LABEL: @max_red_float( 415; CHECK: fcmp fast ogt <2 x float> 416; CHECK: select <2 x i1> 417; CHECK: middle.block 418; CHECK: fcmp fast ogt <2 x float> 419; CHECK: select fast <2 x i1> 420 421define float @max_red_float(float %max) #0 { 422entry: 423 br label %for.body 424 425for.body: 426 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 427 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 428 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 429 %0 = load float, float* %arrayidx, align 4 430 %cmp3 = fcmp fast ogt float %0, %max.red.08 431 %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 432 %indvars.iv.next = add i64 %indvars.iv, 1 433 %exitcond = icmp eq i64 %indvars.iv.next, 1024 434 br i1 %exitcond, label %for.end, label %for.body 435 436for.end: 437 ret float %max.red.0 438} 439 440; CHECK-LABEL: @max_red_float_ge( 441; CHECK: fcmp fast oge <2 x float> 442; CHECK: select <2 x i1> 443; CHECK: middle.block 444; CHECK: fcmp fast ogt <2 x float> 445; CHECK: select fast <2 x i1> 446 447define float @max_red_float_ge(float %max) #0 { 448entry: 449 br label %for.body 450 451for.body: 452 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 453 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 454 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 455 %0 = load float, float* %arrayidx, align 4 456 %cmp3 = fcmp fast oge float %0, %max.red.08 457 %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 458 %indvars.iv.next = add i64 %indvars.iv, 1 459 %exitcond = icmp eq i64 %indvars.iv.next, 1024 460 br i1 %exitcond, label %for.end, label %for.body 461 462for.end: 463 ret float %max.red.0 464} 465 466; CHECK-LABEL: @inverted_max_red_float( 467; CHECK: fcmp fast olt <2 x float> 468; CHECK: select <2 x i1> 469; CHECK: middle.block 470; CHECK: fcmp fast ogt <2 x float> 471; CHECK: select fast <2 x i1> 472 473define float @inverted_max_red_float(float %max) #0 { 474entry: 475 br label %for.body 476 477for.body: 478 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 479 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 480 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 481 %0 = load float, float* %arrayidx, align 4 482 %cmp3 = fcmp fast olt float %0, %max.red.08 483 %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 484 %indvars.iv.next = add i64 %indvars.iv, 1 485 %exitcond = icmp eq i64 %indvars.iv.next, 1024 486 br i1 %exitcond, label %for.end, label %for.body 487 488for.end: 489 ret float %max.red.0 490} 491 492; CHECK-LABEL: @inverted_max_red_float_le( 493; CHECK: fcmp fast ole <2 x float> 494; CHECK: select <2 x i1> 495; CHECK: middle.block 496; CHECK: fcmp fast ogt <2 x float> 497; CHECK: select fast <2 x i1> 498 499define float @inverted_max_red_float_le(float %max) #0 { 500entry: 501 br label %for.body 502 503for.body: 504 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 505 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 506 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 507 %0 = load float, float* %arrayidx, align 4 508 %cmp3 = fcmp fast ole float %0, %max.red.08 509 %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 510 %indvars.iv.next = add i64 %indvars.iv, 1 511 %exitcond = icmp eq i64 %indvars.iv.next, 1024 512 br i1 %exitcond, label %for.end, label %for.body 513 514for.end: 515 ret float %max.red.0 516} 517 518; CHECK-LABEL: @unordered_max_red_float( 519; CHECK: fcmp fast ugt <2 x float> 520; CHECK: select <2 x i1> 521; CHECK: middle.block 522; CHECK: fcmp fast ogt <2 x float> 523; CHECK: select fast <2 x i1> 524 525define float @unordered_max_red_float(float %max) #0 { 526entry: 527 br label %for.body 528 529for.body: 530 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 531 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 532 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 533 %0 = load float, float* %arrayidx, align 4 534 %cmp3 = fcmp fast ugt float %0, %max.red.08 535 %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 536 %indvars.iv.next = add i64 %indvars.iv, 1 537 %exitcond = icmp eq i64 %indvars.iv.next, 1024 538 br i1 %exitcond, label %for.end, label %for.body 539 540for.end: 541 ret float %max.red.0 542} 543 544; CHECK-LABEL: @unordered_max_red_float_ge( 545; CHECK: fcmp fast uge <2 x float> 546; CHECK: select <2 x i1> 547; CHECK: middle.block 548; CHECK: fcmp fast ogt <2 x float> 549; CHECK: select fast <2 x i1> 550 551define float @unordered_max_red_float_ge(float %max) #0 { 552entry: 553 br label %for.body 554 555for.body: 556 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 557 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 558 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 559 %0 = load float, float* %arrayidx, align 4 560 %cmp3 = fcmp fast uge float %0, %max.red.08 561 %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 562 %indvars.iv.next = add i64 %indvars.iv, 1 563 %exitcond = icmp eq i64 %indvars.iv.next, 1024 564 br i1 %exitcond, label %for.end, label %for.body 565 566for.end: 567 ret float %max.red.0 568} 569 570; CHECK-LABEL: @inverted_unordered_max_red_float( 571; CHECK: fcmp fast ult <2 x float> 572; CHECK: select <2 x i1> 573; CHECK: middle.block 574; CHECK: fcmp fast ogt <2 x float> 575; CHECK: select fast <2 x i1> 576 577define float @inverted_unordered_max_red_float(float %max) #0 { 578entry: 579 br label %for.body 580 581for.body: 582 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 583 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 584 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 585 %0 = load float, float* %arrayidx, align 4 586 %cmp3 = fcmp fast ult float %0, %max.red.08 587 %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 588 %indvars.iv.next = add i64 %indvars.iv, 1 589 %exitcond = icmp eq i64 %indvars.iv.next, 1024 590 br i1 %exitcond, label %for.end, label %for.body 591 592for.end: 593 ret float %max.red.0 594} 595 596; CHECK-LABEL: @inverted_unordered_max_red_float_le( 597; CHECK: fcmp fast ule <2 x float> 598; CHECK: select <2 x i1> 599; CHECK: middle.block 600; CHECK: fcmp fast ogt <2 x float> 601; CHECK: select fast <2 x i1> 602 603define float @inverted_unordered_max_red_float_le(float %max) #0 { 604entry: 605 br label %for.body 606 607for.body: 608 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 609 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 610 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 611 %0 = load float, float* %arrayidx, align 4 612 %cmp3 = fcmp fast ule float %0, %max.red.08 613 %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 614 %indvars.iv.next = add i64 %indvars.iv, 1 615 %exitcond = icmp eq i64 %indvars.iv.next, 1024 616 br i1 %exitcond, label %for.end, label %for.body 617 618for.end: 619 ret float %max.red.0 620} 621 622; Minimum. 623 624; Turn this into a min reduction in the presence of a no-nans-fp-math attribute. 625; CHECK-LABEL: @min_red_float( 626; CHECK: fcmp fast olt <2 x float> 627; CHECK: select <2 x i1> 628; CHECK: middle.block 629; CHECK: fcmp fast olt <2 x float> 630; CHECK: select fast <2 x i1> 631 632define float @min_red_float(float %min) #0 { 633entry: 634 br label %for.body 635 636for.body: 637 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 638 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 639 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 640 %0 = load float, float* %arrayidx, align 4 641 %cmp3 = fcmp fast olt float %0, %min.red.08 642 %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 643 %indvars.iv.next = add i64 %indvars.iv, 1 644 %exitcond = icmp eq i64 %indvars.iv.next, 1024 645 br i1 %exitcond, label %for.end, label %for.body 646 647for.end: 648 ret float %min.red.0 649} 650 651; CHECK-LABEL: @min_red_float_le( 652; CHECK: fcmp fast ole <2 x float> 653; CHECK: select <2 x i1> 654; CHECK: middle.block 655; CHECK: fcmp fast olt <2 x float> 656; CHECK: select fast <2 x i1> 657 658define float @min_red_float_le(float %min) #0 { 659entry: 660 br label %for.body 661 662for.body: 663 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 664 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 665 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 666 %0 = load float, float* %arrayidx, align 4 667 %cmp3 = fcmp fast ole float %0, %min.red.08 668 %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 669 %indvars.iv.next = add i64 %indvars.iv, 1 670 %exitcond = icmp eq i64 %indvars.iv.next, 1024 671 br i1 %exitcond, label %for.end, label %for.body 672 673for.end: 674 ret float %min.red.0 675} 676 677; CHECK-LABEL: @inverted_min_red_float( 678; CHECK: fcmp fast ogt <2 x float> 679; CHECK: select <2 x i1> 680; CHECK: middle.block 681; CHECK: fcmp fast olt <2 x float> 682; CHECK: select fast <2 x i1> 683 684define float @inverted_min_red_float(float %min) #0 { 685entry: 686 br label %for.body 687 688for.body: 689 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 690 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 691 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 692 %0 = load float, float* %arrayidx, align 4 693 %cmp3 = fcmp fast ogt float %0, %min.red.08 694 %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 695 %indvars.iv.next = add i64 %indvars.iv, 1 696 %exitcond = icmp eq i64 %indvars.iv.next, 1024 697 br i1 %exitcond, label %for.end, label %for.body 698 699for.end: 700 ret float %min.red.0 701} 702 703; CHECK-LABEL: @inverted_min_red_float_ge( 704; CHECK: fcmp fast oge <2 x float> 705; CHECK: select <2 x i1> 706; CHECK: middle.block 707; CHECK: fcmp fast olt <2 x float> 708; CHECK: select fast <2 x i1> 709 710define float @inverted_min_red_float_ge(float %min) #0 { 711entry: 712 br label %for.body 713 714for.body: 715 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 716 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 717 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 718 %0 = load float, float* %arrayidx, align 4 719 %cmp3 = fcmp fast oge float %0, %min.red.08 720 %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 721 %indvars.iv.next = add i64 %indvars.iv, 1 722 %exitcond = icmp eq i64 %indvars.iv.next, 1024 723 br i1 %exitcond, label %for.end, label %for.body 724 725for.end: 726 ret float %min.red.0 727} 728 729; CHECK-LABEL: @unordered_min_red_float( 730; CHECK: fcmp fast ult <2 x float> 731; CHECK: select <2 x i1> 732; CHECK: middle.block 733; CHECK: fcmp fast olt <2 x float> 734; CHECK: select fast <2 x i1> 735 736define float @unordered_min_red_float(float %min) #0 { 737entry: 738 br label %for.body 739 740for.body: 741 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 742 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 743 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 744 %0 = load float, float* %arrayidx, align 4 745 %cmp3 = fcmp fast ult float %0, %min.red.08 746 %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 747 %indvars.iv.next = add i64 %indvars.iv, 1 748 %exitcond = icmp eq i64 %indvars.iv.next, 1024 749 br i1 %exitcond, label %for.end, label %for.body 750 751for.end: 752 ret float %min.red.0 753} 754 755; CHECK-LABEL: @unordered_min_red_float_le( 756; CHECK: fcmp fast ule <2 x float> 757; CHECK: select <2 x i1> 758; CHECK: middle.block 759; CHECK: fcmp fast olt <2 x float> 760; CHECK: select fast <2 x i1> 761 762define float @unordered_min_red_float_le(float %min) #0 { 763entry: 764 br label %for.body 765 766for.body: 767 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 768 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 769 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 770 %0 = load float, float* %arrayidx, align 4 771 %cmp3 = fcmp fast ule float %0, %min.red.08 772 %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 773 %indvars.iv.next = add i64 %indvars.iv, 1 774 %exitcond = icmp eq i64 %indvars.iv.next, 1024 775 br i1 %exitcond, label %for.end, label %for.body 776 777for.end: 778 ret float %min.red.0 779} 780 781; CHECK-LABEL: @inverted_unordered_min_red_float( 782; CHECK: fcmp fast ugt <2 x float> 783; CHECK: select <2 x i1> 784; CHECK: middle.block 785; CHECK: fcmp fast olt <2 x float> 786; CHECK: select fast <2 x i1> 787 788define float @inverted_unordered_min_red_float(float %min) #0 { 789entry: 790 br label %for.body 791 792for.body: 793 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 794 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 795 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 796 %0 = load float, float* %arrayidx, align 4 797 %cmp3 = fcmp fast ugt float %0, %min.red.08 798 %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 799 %indvars.iv.next = add i64 %indvars.iv, 1 800 %exitcond = icmp eq i64 %indvars.iv.next, 1024 801 br i1 %exitcond, label %for.end, label %for.body 802 803for.end: 804 ret float %min.red.0 805} 806 807; CHECK-LABEL: @inverted_unordered_min_red_float_ge( 808; CHECK: fcmp fast uge <2 x float> 809; CHECK: select <2 x i1> 810; CHECK: middle.block 811; CHECK: fcmp fast olt <2 x float> 812; CHECK: select fast <2 x i1> 813 814define float @inverted_unordered_min_red_float_ge(float %min) #0 { 815entry: 816 br label %for.body 817 818for.body: 819 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 820 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 821 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 822 %0 = load float, float* %arrayidx, align 4 823 %cmp3 = fcmp fast uge float %0, %min.red.08 824 %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 825 %indvars.iv.next = add i64 %indvars.iv, 1 826 %exitcond = icmp eq i64 %indvars.iv.next, 1024 827 br i1 %exitcond, label %for.end, label %for.body 828 829for.end: 830 ret float %min.red.0 831} 832 833; Make sure we handle doubles, too. 834; CHECK-LABEL: @min_red_double( 835; CHECK: fcmp fast olt <2 x double> 836; CHECK: select <2 x i1> 837; CHECK: middle.block 838; CHECK: fcmp fast olt <2 x double> 839; CHECK: select fast <2 x i1> 840 841define double @min_red_double(double %min) #0 { 842entry: 843 br label %for.body 844 845for.body: 846 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 847 %min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ] 848 %arrayidx = getelementptr inbounds [1024 x double], [1024 x double]* @dA, i64 0, i64 %indvars.iv 849 %0 = load double, double* %arrayidx, align 4 850 %cmp3 = fcmp fast olt double %0, %min.red.08 851 %min.red.0 = select i1 %cmp3, double %0, double %min.red.08 852 %indvars.iv.next = add i64 %indvars.iv, 1 853 %exitcond = icmp eq i64 %indvars.iv.next, 1024 854 br i1 %exitcond, label %for.end, label %for.body 855 856for.end: 857 ret double %min.red.0 858} 859 860 861; Don't this into a max reduction. The no-nans-fp-math attribute is missing 862; CHECK-LABEL: @max_red_float_nans( 863; CHECK-NOT: <2 x float> 864 865define float @max_red_float_nans(float %max) { 866entry: 867 br label %for.body 868 869for.body: 870 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 871 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 872 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 873 %0 = load float, float* %arrayidx, align 4 874 %cmp3 = fcmp fast ogt float %0, %max.red.08 875 %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 876 %indvars.iv.next = add i64 %indvars.iv, 1 877 %exitcond = icmp eq i64 %indvars.iv.next, 1024 878 br i1 %exitcond, label %for.end, label %for.body 879 880for.end: 881 ret float %max.red.0 882} 883 884 885attributes #0 = { "no-nans-fp-math"="true" } 886