• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -loop-vectorize -S < %s | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
4target triple = "x86_64-unknown-linux-gnu"
5
6; PR34965/D39346
7
8; LV retains the original scalar loop intact as remainder loop. However,
9; after this transformation, analysis information concerning the remainder
10; loop may differ from the original scalar loop. This test is an example of
11; that behaviour, where values inside the remainder loop which SCEV could
12; originally analyze now require flow-sensitive analysis currently not
13; supported in SCEV. In particular, during LV code generation, after turning
14; the original scalar loop into the remainder loop, LV expected
15; Legal->isConsecutivePtr() to be consistent and return the same output as
16; during legal/cost model phases (original scalar loop). Unfortunately, that
17; condition was not satisfied because of the aforementioned SCEV limitation.
18; After D39346, LV code generation doesn't rely on Legal->isConsecutivePtr(),
19; i.e., SCEV. This test verifies that LV is able to handle the described cases.
20;
21; TODO: The SCEV limitation described before may affect plans to further
22; optimize the remainder loop of this particular test case. One tentative
23; solution is to detect the problematic IVs in LV (%7 and %8) and perform an
24; in-place IV optimization by replacing:
25;   %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ] with
26; with
27;   %8 = sub i32 %7, 1.
28
29
30; Verify that store is vectorized as stride-1 memory access.
31
32; CHECK-LABEL: @test_01(
33; CHECK-NOT: vector.body:
34
35; This test was originally vectorized, but now SCEV is smart enough to prove
36; that its trip count is 1, so it gets ignored by vectorizer.
37; Function Attrs: uwtable
38define void @test_01() {
39  br label %.outer
40
41; <label>:1:                                      ; preds = %2
42  ret void
43
44; <label>:2:                                      ; preds = %._crit_edge.loopexit
45  %3 = add nsw i32 %.ph, -2
46  br i1 undef, label %1, label %.outer
47
48.outer:                                           ; preds = %2, %0
49  %.ph = phi i32 [ %3, %2 ], [ 336, %0 ]
50  %.ph2 = phi i32 [ 62, %2 ], [ 110, %0 ]
51  %4 = and i32 %.ph, 30
52  %5 = add i32 %.ph2, 1
53  br label %6
54
55; <label>:6:                                      ; preds = %6, %.outer
56  %7 = phi i32 [ %5, %.outer ], [ %13, %6 ]
57  %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ]
58  %9 = add i32 %8, 2
59  %10 = zext i32 %9 to i64
60  %11 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %10
61  %12 = ashr i32 undef, %4
62  store i32 %12, i32 addrspace(1)* %11, align 4
63  %13 = add i32 %7, 1
64  %14 = icmp sgt i32 %13, 61
65  br i1 %14, label %._crit_edge.loopexit, label %6
66
67._crit_edge.loopexit:                             ; preds = %._crit_edge.loopexit, %6
68  br i1 undef, label %2, label %._crit_edge.loopexit
69}
70
71; After trip count is increased, the test gets vectorized.
72; CHECK-LABEL: @test_02(
73; CHECK: vector.body:
74; CHECK: store <4 x i32>
75
76; Function Attrs: uwtable
77define void @test_02() {
78  br label %.outer
79
80; <label>:1:                                      ; preds = %2
81  ret void
82
83; <label>:2:                                      ; preds = %._crit_edge.loopexit
84  %3 = add nsw i32 %.ph, -2
85  br i1 undef, label %1, label %.outer
86
87.outer:                                           ; preds = %2, %0
88  %.ph = phi i32 [ %3, %2 ], [ 336, %0 ]
89  %.ph2 = phi i32 [ 62, %2 ], [ 110, %0 ]
90  %4 = and i32 %.ph, 30
91  %5 = add i32 %.ph2, 1
92  br label %6
93
94; <label>:6:                                      ; preds = %6, %.outer
95  %7 = phi i32 [ %5, %.outer ], [ %13, %6 ]
96  %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ]
97  %9 = add i32 %8, 2
98  %10 = zext i32 %9 to i64
99  %11 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %10
100  %12 = ashr i32 undef, %4
101  store i32 %12, i32 addrspace(1)* %11, align 4
102  %13 = add i32 %7, 1
103  %14 = icmp sgt i32 %13, 610
104  br i1 %14, label %._crit_edge.loopexit, label %6
105
106._crit_edge.loopexit:                             ; preds = %._crit_edge.loopexit, %6
107  br i1 undef, label %2, label %._crit_edge.loopexit
108}
109