• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -mtriple=thumbv7em -arm-parallel-dsp -dce -S %s -o - | FileCheck %s
2
3; CHECK-LABEL: full_unroll
4; CHECK: [[IV:%[^ ]+]] = phi i32
5; CHECK: [[AI:%[^ ]+]] = getelementptr inbounds i32, i32* %a, i32 [[IV]]
6; CHECK: [[BI:%[^ ]+]] = getelementptr inbounds i16*, i16** %b, i32 [[IV]]
7; CHECK: [[BIJ:%[^ ]+]] = load i16*, i16** %arrayidx5, align 4
8; CHECK: [[CI:%[^ ]+]] = getelementptr inbounds i16*, i16** %c, i32 [[IV]]
9; CHECK: [[CIJ:%[^ ]+]] = load i16*, i16** [[CI]], align 4
10; CHECK: [[BIJ_CAST:%[^ ]+]] = bitcast i16* [[BIJ]] to i32*
11; CHECK: [[BIJ_LD:%[^ ]+]] = load i32, i32* [[BIJ_CAST]], align 2
12; CHECK: [[CIJ_CAST:%[^ ]+]] = bitcast i16* [[CIJ]] to i32*
13; CHECK: [[CIJ_LD:%[^ ]+]] = load i32, i32* [[CIJ_CAST]], align 2
14; CHECK: [[SMLAD0:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[CIJ_LD]], i32 [[BIJ_LD]], i32 0)
15; CHECK: [[BIJ_2:%[^ ]+]] = getelementptr inbounds i16, i16* [[BIJ]], i32 2
16; CHECK: [[BIJ_2_CAST:%[^ ]+]] = bitcast i16* [[BIJ_2]] to i32*
17; CHECK: [[BIJ_2_LD:%[^ ]+]] = load i32, i32* [[BIJ_2_CAST]], align 2
18; CHECK: [[CIJ_2:%[^ ]+]] = getelementptr inbounds i16, i16* [[CIJ]], i32 2
19; CHECK: [[CIJ_2_CAST:%[^ ]+]] = bitcast i16* [[CIJ_2]] to i32*
20; CHECK: [[CIJ_2_LD:%[^ ]+]] = load i32, i32* [[CIJ_2_CAST]], align 2
21; CHECK: [[SMLAD1:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[CIJ_2_LD]], i32 [[BIJ_2_LD]], i32 [[SMLAD0]])
22; CHECK: store i32 [[SMLAD1]], i32* %arrayidx, align 4
23
24define void @full_unroll(i32* noalias nocapture %a, i16** noalias nocapture readonly %b, i16** noalias nocapture readonly %c, i32 %N) {
25entry:
26  %cmp29 = icmp eq i32 %N, 0
27  br i1 %cmp29, label %for.cond.cleanup, label %for.body
28
29for.cond.cleanup:                                 ; preds = %for.body, %entry
30  ret void
31
32for.body:                                         ; preds = %entry, %for.body
33  %i.030 = phi i32 [ %inc12, %for.body ], [ 0, %entry ]
34  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.030
35  %arrayidx5 = getelementptr inbounds i16*, i16** %b, i32 %i.030
36  %0 = load i16*, i16** %arrayidx5, align 4
37  %arrayidx7 = getelementptr inbounds i16*, i16** %c, i32 %i.030
38  %1 = load i16*, i16** %arrayidx7, align 4
39  %2 = load i16, i16* %0, align 2
40  %conv = sext i16 %2 to i32
41  %3 = load i16, i16* %1, align 2
42  %conv9 = sext i16 %3 to i32
43  %mul = mul nsw i32 %conv9, %conv
44  %arrayidx6.1 = getelementptr inbounds i16, i16* %0, i32 1
45  %4 = load i16, i16* %arrayidx6.1, align 2
46  %conv.1 = sext i16 %4 to i32
47  %arrayidx8.1 = getelementptr inbounds i16, i16* %1, i32 1
48  %5 = load i16, i16* %arrayidx8.1, align 2
49  %conv9.1 = sext i16 %5 to i32
50  %mul.1 = mul nsw i32 %conv9.1, %conv.1
51  %add.1 = add nsw i32 %mul.1, %mul
52  %arrayidx6.2 = getelementptr inbounds i16, i16* %0, i32 2
53  %6 = load i16, i16* %arrayidx6.2, align 2
54  %conv.2 = sext i16 %6 to i32
55  %arrayidx8.2 = getelementptr inbounds i16, i16* %1, i32 2
56  %7 = load i16, i16* %arrayidx8.2, align 2
57  %conv9.2 = sext i16 %7 to i32
58  %mul.2 = mul nsw i32 %conv9.2, %conv.2
59  %add.2 = add nsw i32 %mul.2, %add.1
60  %arrayidx6.3 = getelementptr inbounds i16, i16* %0, i32 3
61  %8 = load i16, i16* %arrayidx6.3, align 2
62  %conv.3 = sext i16 %8 to i32
63  %arrayidx8.3 = getelementptr inbounds i16, i16* %1, i32 3
64  %9 = load i16, i16* %arrayidx8.3, align 2
65  %conv9.3 = sext i16 %9 to i32
66  %mul.3 = mul nsw i32 %conv9.3, %conv.3
67  %add.3 = add nsw i32 %mul.3, %add.2
68  store i32 %add.3, i32* %arrayidx, align 4
69  %inc12 = add nuw i32 %i.030, 1
70  %exitcond = icmp eq i32 %inc12, %N
71  br i1 %exitcond, label %for.cond.cleanup, label %for.body
72}
73
74; CHECK-LABEL: full_unroll_sub
75; CHECK: [[IV:%[^ ]+]] = phi i32
76; CHECK: [[AI:%[^ ]+]] = getelementptr inbounds i32, i32* %a, i32 [[IV]]
77; CHECK: [[BI:%[^ ]+]] = getelementptr inbounds i16*, i16** %b, i32 [[IV]]
78; CHECK: [[BIJ:%[^ ]+]] = load i16*, i16** [[BI]], align 4
79; CHECK: [[CI:%[^ ]+]] = getelementptr inbounds i16*, i16** %c, i32 [[IV]]
80; CHECK: [[CIJ:%[^ ]+]] = load i16*, i16** [[CI]], align 4
81; CHECK: [[BIJ_LD:%[^ ]+]] = load i16, i16* [[BIJ]], align 2
82; CHECK: [[BIJ_LD_SXT:%[^ ]+]] = sext i16 [[BIJ_LD]] to i32
83; CHECK: [[CIJ_LD:%[^ ]+]] = load i16, i16* [[CIJ]], align 2
84; CHECK: [[CIJ_LD_SXT:%[^ ]+]] = sext i16 [[CIJ_LD]] to i32
85; CHECK: [[SUB:%[^ ]+]] = sub nsw i32 [[CIJ_LD_SXT]], [[BIJ_LD_SXT]]
86; CHECK: [[BIJ_1:%[^ ]+]] = getelementptr inbounds i16, i16* [[BIJ]], i32 1
87; CHECK: [[BIJ_1_LD:%[^ ]+]] = load i16, i16* [[BIJ_1]], align 2
88; CHECK: [[BIJ_1_LD_SXT:%[^ ]+]] = sext i16 [[BIJ_1_LD]] to i32
89; CHECK: [[CIJ_1:%[^ ]+]] = getelementptr inbounds i16, i16* [[CIJ]], i32 1
90; CHECK: [[CIJ_1_LD:%[^ ]+]] = load i16, i16* [[CIJ_1]], align 2
91; CHECK: [[CIJ_1_LD_SXT:%[^ ]+]] = sext i16 [[CIJ_1_LD]] to i32
92; CHECK: [[MUL:%[^ ]+]] = mul nsw i32 [[CIJ_1_LD_SXT]], [[BIJ_1_LD_SXT]]
93; CHECK: [[ACC:%[^ ]+]] = add nsw i32 [[MUL]], [[SUB]]
94; CHECK: [[BIJ_2:%[^ ]+]] = getelementptr inbounds i16, i16* [[BIJ]], i32 2
95; CHECK: [[BIJ_2_CAST:%[^ ]+]] = bitcast i16* [[BIJ_2]] to i32*
96; CHECK: [[BIJ_2_LD:%[^ ]+]] = load i32, i32* [[BIJ_2_CAST]], align 2
97; CHECK: [[CIJ_2:%[^ ]+]] = getelementptr inbounds i16, i16* [[CIJ]], i32 2
98; CHECK: [[CIJ_2_CAST:%[^ ]+]] = bitcast i16* [[CIJ_2]] to i32*
99; CHECK: [[CIJ_2_LD:%[^ ]+]] = load i32, i32* [[CIJ_2_CAST]], align 2
100; CHECK: [[SMLAD0:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[CIJ_2_LD]], i32 [[BIJ_2_LD]], i32 [[ACC]])
101; CHECK: store i32 [[SMLAD0]], i32* %arrayidx, align 4
102
103define void @full_unroll_sub(i32* noalias nocapture %a, i16** noalias nocapture readonly %b, i16** noalias nocapture readonly %c, i32 %N) {
104entry:
105  %cmp29 = icmp eq i32 %N, 0
106  br i1 %cmp29, label %for.cond.cleanup, label %for.body
107
108for.cond.cleanup:                                 ; preds = %for.body, %entry
109  ret void
110
111for.body:                                         ; preds = %entry, %for.body
112  %i.030 = phi i32 [ %inc12, %for.body ], [ 0, %entry ]
113  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.030
114  %arrayidx5 = getelementptr inbounds i16*, i16** %b, i32 %i.030
115  %0 = load i16*, i16** %arrayidx5, align 4
116  %arrayidx7 = getelementptr inbounds i16*, i16** %c, i32 %i.030
117  %1 = load i16*, i16** %arrayidx7, align 4
118  %2 = load i16, i16* %0, align 2
119  %conv = sext i16 %2 to i32
120  %3 = load i16, i16* %1, align 2
121  %conv9 = sext i16 %3 to i32
122  %sub = sub nsw i32 %conv9, %conv
123  %arrayidx6.1 = getelementptr inbounds i16, i16* %0, i32 1
124  %4 = load i16, i16* %arrayidx6.1, align 2
125  %conv.1 = sext i16 %4 to i32
126  %arrayidx8.1 = getelementptr inbounds i16, i16* %1, i32 1
127  %5 = load i16, i16* %arrayidx8.1, align 2
128  %conv9.1 = sext i16 %5 to i32
129  %mul.1 = mul nsw i32 %conv9.1, %conv.1
130  %add.1 = add nsw i32 %mul.1, %sub
131  %arrayidx6.2 = getelementptr inbounds i16, i16* %0, i32 2
132  %6 = load i16, i16* %arrayidx6.2, align 2
133  %conv.2 = sext i16 %6 to i32
134  %arrayidx8.2 = getelementptr inbounds i16, i16* %1, i32 2
135  %7 = load i16, i16* %arrayidx8.2, align 2
136  %conv9.2 = sext i16 %7 to i32
137  %mul.2 = mul nsw i32 %conv9.2, %conv.2
138  %add.2 = add nsw i32 %mul.2, %add.1
139  %arrayidx6.3 = getelementptr inbounds i16, i16* %0, i32 3
140  %8 = load i16, i16* %arrayidx6.3, align 2
141  %conv.3 = sext i16 %8 to i32
142  %arrayidx8.3 = getelementptr inbounds i16, i16* %1, i32 3
143  %9 = load i16, i16* %arrayidx8.3, align 2
144  %conv9.3 = sext i16 %9 to i32
145  %mul.3 = mul nsw i32 %conv9.3, %conv.3
146  %add.3 = add nsw i32 %mul.3, %add.2
147  store i32 %add.3, i32* %arrayidx, align 4
148  %inc12 = add nuw i32 %i.030, 1
149  %exitcond = icmp eq i32 %inc12, %N
150  br i1 %exitcond, label %for.cond.cleanup, label %for.body
151}
152