1; RUN: opt -loop-vectorize -S < %s | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" 4target triple = "x86_64-unknown-linux-gnu" 5 6; PR34965/D39346 7 8; LV retains the original scalar loop intact as remainder loop. However, 9; after this transformation, analysis information concerning the remainder 10; loop may differ from the original scalar loop. This test is an example of 11; that behaviour, where values inside the remainder loop which SCEV could 12; originally analyze now require flow-sensitive analysis currently not 13; supported in SCEV. In particular, during LV code generation, after turning 14; the original scalar loop into the remainder loop, LV expected 15; Legal->isConsecutivePtr() to be consistent and return the same output as 16; during legal/cost model phases (original scalar loop). Unfortunately, that 17; condition was not satisfied because of the aforementioned SCEV limitation. 18; After D39346, LV code generation doesn't rely on Legal->isConsecutivePtr(), 19; i.e., SCEV. This test verifies that LV is able to handle the described cases. 20; 21; TODO: The SCEV limitation described before may affect plans to further 22; optimize the remainder loop of this particular test case. One tentative 23; solution is to detect the problematic IVs in LV (%7 and %8) and perform an 24; in-place IV optimization by replacing: 25; %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ] with 26; with 27; %8 = sub i32 %7, 1. 28 29 30; Verify that store is vectorized as stride-1 memory access. 31 32; CHECK-LABEL: @test_01( 33; CHECK-NOT: vector.body: 34 35; This test was originally vectorized, but now SCEV is smart enough to prove 36; that its trip count is 1, so it gets ignored by vectorizer. 37; Function Attrs: uwtable 38define void @test_01() { 39 br label %.outer 40 41; <label>:1: ; preds = %2 42 ret void 43 44; <label>:2: ; preds = %._crit_edge.loopexit 45 %3 = add nsw i32 %.ph, -2 46 br i1 undef, label %1, label %.outer 47 48.outer: ; preds = %2, %0 49 %.ph = phi i32 [ %3, %2 ], [ 336, %0 ] 50 %.ph2 = phi i32 [ 62, %2 ], [ 110, %0 ] 51 %4 = and i32 %.ph, 30 52 %5 = add i32 %.ph2, 1 53 br label %6 54 55; <label>:6: ; preds = %6, %.outer 56 %7 = phi i32 [ %5, %.outer ], [ %13, %6 ] 57 %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ] 58 %9 = add i32 %8, 2 59 %10 = zext i32 %9 to i64 60 %11 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %10 61 %12 = ashr i32 undef, %4 62 store i32 %12, i32 addrspace(1)* %11, align 4 63 %13 = add i32 %7, 1 64 %14 = icmp sgt i32 %13, 61 65 br i1 %14, label %._crit_edge.loopexit, label %6 66 67._crit_edge.loopexit: ; preds = %._crit_edge.loopexit, %6 68 br i1 undef, label %2, label %._crit_edge.loopexit 69} 70 71; After trip count is increased, the test gets vectorized. 72; CHECK-LABEL: @test_02( 73; CHECK: vector.body: 74; CHECK: store <4 x i32> 75 76; Function Attrs: uwtable 77define void @test_02() { 78 br label %.outer 79 80; <label>:1: ; preds = %2 81 ret void 82 83; <label>:2: ; preds = %._crit_edge.loopexit 84 %3 = add nsw i32 %.ph, -2 85 br i1 undef, label %1, label %.outer 86 87.outer: ; preds = %2, %0 88 %.ph = phi i32 [ %3, %2 ], [ 336, %0 ] 89 %.ph2 = phi i32 [ 62, %2 ], [ 110, %0 ] 90 %4 = and i32 %.ph, 30 91 %5 = add i32 %.ph2, 1 92 br label %6 93 94; <label>:6: ; preds = %6, %.outer 95 %7 = phi i32 [ %5, %.outer ], [ %13, %6 ] 96 %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ] 97 %9 = add i32 %8, 2 98 %10 = zext i32 %9 to i64 99 %11 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %10 100 %12 = ashr i32 undef, %4 101 store i32 %12, i32 addrspace(1)* %11, align 4 102 %13 = add i32 %7, 1 103 %14 = icmp sgt i32 %13, 610 104 br i1 %14, label %._crit_edge.loopexit, label %6 105 106._crit_edge.loopexit: ; preds = %._crit_edge.loopexit, %6 107 br i1 undef, label %2, label %._crit_edge.loopexit 108} 109