1; RUN: opt -mtriple=thumbv7em -arm-parallel-dsp -dce -S %s -o - | FileCheck %s 2 3; CHECK-LABEL: full_unroll 4; CHECK: [[IV:%[^ ]+]] = phi i32 5; CHECK: [[AI:%[^ ]+]] = getelementptr inbounds i32, i32* %a, i32 [[IV]] 6; CHECK: [[BI:%[^ ]+]] = getelementptr inbounds i16*, i16** %b, i32 [[IV]] 7; CHECK: [[BIJ:%[^ ]+]] = load i16*, i16** %arrayidx5, align 4 8; CHECK: [[CI:%[^ ]+]] = getelementptr inbounds i16*, i16** %c, i32 [[IV]] 9; CHECK: [[CIJ:%[^ ]+]] = load i16*, i16** [[CI]], align 4 10; CHECK: [[BIJ_CAST:%[^ ]+]] = bitcast i16* [[BIJ]] to i32* 11; CHECK: [[BIJ_LD:%[^ ]+]] = load i32, i32* [[BIJ_CAST]], align 2 12; CHECK: [[CIJ_CAST:%[^ ]+]] = bitcast i16* [[CIJ]] to i32* 13; CHECK: [[CIJ_LD:%[^ ]+]] = load i32, i32* [[CIJ_CAST]], align 2 14; CHECK: [[SMLAD0:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[CIJ_LD]], i32 [[BIJ_LD]], i32 0) 15; CHECK: [[BIJ_2:%[^ ]+]] = getelementptr inbounds i16, i16* [[BIJ]], i32 2 16; CHECK: [[BIJ_2_CAST:%[^ ]+]] = bitcast i16* [[BIJ_2]] to i32* 17; CHECK: [[BIJ_2_LD:%[^ ]+]] = load i32, i32* [[BIJ_2_CAST]], align 2 18; CHECK: [[CIJ_2:%[^ ]+]] = getelementptr inbounds i16, i16* [[CIJ]], i32 2 19; CHECK: [[CIJ_2_CAST:%[^ ]+]] = bitcast i16* [[CIJ_2]] to i32* 20; CHECK: [[CIJ_2_LD:%[^ ]+]] = load i32, i32* [[CIJ_2_CAST]], align 2 21; CHECK: [[SMLAD1:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[CIJ_2_LD]], i32 [[BIJ_2_LD]], i32 [[SMLAD0]]) 22; CHECK: store i32 [[SMLAD1]], i32* %arrayidx, align 4 23 24define void @full_unroll(i32* noalias nocapture %a, i16** noalias nocapture readonly %b, i16** noalias nocapture readonly %c, i32 %N) { 25entry: 26 %cmp29 = icmp eq i32 %N, 0 27 br i1 %cmp29, label %for.cond.cleanup, label %for.body 28 29for.cond.cleanup: ; preds = %for.body, %entry 30 ret void 31 32for.body: ; preds = %entry, %for.body 33 %i.030 = phi i32 [ %inc12, %for.body ], [ 0, %entry ] 34 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.030 35 %arrayidx5 = getelementptr inbounds i16*, i16** %b, i32 %i.030 36 %0 = load i16*, i16** %arrayidx5, align 4 37 %arrayidx7 = getelementptr inbounds i16*, i16** %c, i32 %i.030 38 %1 = load i16*, i16** %arrayidx7, align 4 39 %2 = load i16, i16* %0, align 2 40 %conv = sext i16 %2 to i32 41 %3 = load i16, i16* %1, align 2 42 %conv9 = sext i16 %3 to i32 43 %mul = mul nsw i32 %conv9, %conv 44 %arrayidx6.1 = getelementptr inbounds i16, i16* %0, i32 1 45 %4 = load i16, i16* %arrayidx6.1, align 2 46 %conv.1 = sext i16 %4 to i32 47 %arrayidx8.1 = getelementptr inbounds i16, i16* %1, i32 1 48 %5 = load i16, i16* %arrayidx8.1, align 2 49 %conv9.1 = sext i16 %5 to i32 50 %mul.1 = mul nsw i32 %conv9.1, %conv.1 51 %add.1 = add nsw i32 %mul.1, %mul 52 %arrayidx6.2 = getelementptr inbounds i16, i16* %0, i32 2 53 %6 = load i16, i16* %arrayidx6.2, align 2 54 %conv.2 = sext i16 %6 to i32 55 %arrayidx8.2 = getelementptr inbounds i16, i16* %1, i32 2 56 %7 = load i16, i16* %arrayidx8.2, align 2 57 %conv9.2 = sext i16 %7 to i32 58 %mul.2 = mul nsw i32 %conv9.2, %conv.2 59 %add.2 = add nsw i32 %mul.2, %add.1 60 %arrayidx6.3 = getelementptr inbounds i16, i16* %0, i32 3 61 %8 = load i16, i16* %arrayidx6.3, align 2 62 %conv.3 = sext i16 %8 to i32 63 %arrayidx8.3 = getelementptr inbounds i16, i16* %1, i32 3 64 %9 = load i16, i16* %arrayidx8.3, align 2 65 %conv9.3 = sext i16 %9 to i32 66 %mul.3 = mul nsw i32 %conv9.3, %conv.3 67 %add.3 = add nsw i32 %mul.3, %add.2 68 store i32 %add.3, i32* %arrayidx, align 4 69 %inc12 = add nuw i32 %i.030, 1 70 %exitcond = icmp eq i32 %inc12, %N 71 br i1 %exitcond, label %for.cond.cleanup, label %for.body 72} 73 74; CHECK-LABEL: full_unroll_sub 75; CHECK: [[IV:%[^ ]+]] = phi i32 76; CHECK: [[AI:%[^ ]+]] = getelementptr inbounds i32, i32* %a, i32 [[IV]] 77; CHECK: [[BI:%[^ ]+]] = getelementptr inbounds i16*, i16** %b, i32 [[IV]] 78; CHECK: [[BIJ:%[^ ]+]] = load i16*, i16** [[BI]], align 4 79; CHECK: [[CI:%[^ ]+]] = getelementptr inbounds i16*, i16** %c, i32 [[IV]] 80; CHECK: [[CIJ:%[^ ]+]] = load i16*, i16** [[CI]], align 4 81; CHECK: [[BIJ_LD:%[^ ]+]] = load i16, i16* [[BIJ]], align 2 82; CHECK: [[BIJ_LD_SXT:%[^ ]+]] = sext i16 [[BIJ_LD]] to i32 83; CHECK: [[CIJ_LD:%[^ ]+]] = load i16, i16* [[CIJ]], align 2 84; CHECK: [[CIJ_LD_SXT:%[^ ]+]] = sext i16 [[CIJ_LD]] to i32 85; CHECK: [[SUB:%[^ ]+]] = sub nsw i32 [[CIJ_LD_SXT]], [[BIJ_LD_SXT]] 86; CHECK: [[BIJ_1:%[^ ]+]] = getelementptr inbounds i16, i16* [[BIJ]], i32 1 87; CHECK: [[BIJ_1_LD:%[^ ]+]] = load i16, i16* [[BIJ_1]], align 2 88; CHECK: [[BIJ_1_LD_SXT:%[^ ]+]] = sext i16 [[BIJ_1_LD]] to i32 89; CHECK: [[CIJ_1:%[^ ]+]] = getelementptr inbounds i16, i16* [[CIJ]], i32 1 90; CHECK: [[CIJ_1_LD:%[^ ]+]] = load i16, i16* [[CIJ_1]], align 2 91; CHECK: [[CIJ_1_LD_SXT:%[^ ]+]] = sext i16 [[CIJ_1_LD]] to i32 92; CHECK: [[MUL:%[^ ]+]] = mul nsw i32 [[CIJ_1_LD_SXT]], [[BIJ_1_LD_SXT]] 93; CHECK: [[ACC:%[^ ]+]] = add nsw i32 [[MUL]], [[SUB]] 94; CHECK: [[BIJ_2:%[^ ]+]] = getelementptr inbounds i16, i16* [[BIJ]], i32 2 95; CHECK: [[BIJ_2_CAST:%[^ ]+]] = bitcast i16* [[BIJ_2]] to i32* 96; CHECK: [[BIJ_2_LD:%[^ ]+]] = load i32, i32* [[BIJ_2_CAST]], align 2 97; CHECK: [[CIJ_2:%[^ ]+]] = getelementptr inbounds i16, i16* [[CIJ]], i32 2 98; CHECK: [[CIJ_2_CAST:%[^ ]+]] = bitcast i16* [[CIJ_2]] to i32* 99; CHECK: [[CIJ_2_LD:%[^ ]+]] = load i32, i32* [[CIJ_2_CAST]], align 2 100; CHECK: [[SMLAD0:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[CIJ_2_LD]], i32 [[BIJ_2_LD]], i32 [[ACC]]) 101; CHECK: store i32 [[SMLAD0]], i32* %arrayidx, align 4 102 103define void @full_unroll_sub(i32* noalias nocapture %a, i16** noalias nocapture readonly %b, i16** noalias nocapture readonly %c, i32 %N) { 104entry: 105 %cmp29 = icmp eq i32 %N, 0 106 br i1 %cmp29, label %for.cond.cleanup, label %for.body 107 108for.cond.cleanup: ; preds = %for.body, %entry 109 ret void 110 111for.body: ; preds = %entry, %for.body 112 %i.030 = phi i32 [ %inc12, %for.body ], [ 0, %entry ] 113 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.030 114 %arrayidx5 = getelementptr inbounds i16*, i16** %b, i32 %i.030 115 %0 = load i16*, i16** %arrayidx5, align 4 116 %arrayidx7 = getelementptr inbounds i16*, i16** %c, i32 %i.030 117 %1 = load i16*, i16** %arrayidx7, align 4 118 %2 = load i16, i16* %0, align 2 119 %conv = sext i16 %2 to i32 120 %3 = load i16, i16* %1, align 2 121 %conv9 = sext i16 %3 to i32 122 %sub = sub nsw i32 %conv9, %conv 123 %arrayidx6.1 = getelementptr inbounds i16, i16* %0, i32 1 124 %4 = load i16, i16* %arrayidx6.1, align 2 125 %conv.1 = sext i16 %4 to i32 126 %arrayidx8.1 = getelementptr inbounds i16, i16* %1, i32 1 127 %5 = load i16, i16* %arrayidx8.1, align 2 128 %conv9.1 = sext i16 %5 to i32 129 %mul.1 = mul nsw i32 %conv9.1, %conv.1 130 %add.1 = add nsw i32 %mul.1, %sub 131 %arrayidx6.2 = getelementptr inbounds i16, i16* %0, i32 2 132 %6 = load i16, i16* %arrayidx6.2, align 2 133 %conv.2 = sext i16 %6 to i32 134 %arrayidx8.2 = getelementptr inbounds i16, i16* %1, i32 2 135 %7 = load i16, i16* %arrayidx8.2, align 2 136 %conv9.2 = sext i16 %7 to i32 137 %mul.2 = mul nsw i32 %conv9.2, %conv.2 138 %add.2 = add nsw i32 %mul.2, %add.1 139 %arrayidx6.3 = getelementptr inbounds i16, i16* %0, i32 3 140 %8 = load i16, i16* %arrayidx6.3, align 2 141 %conv.3 = sext i16 %8 to i32 142 %arrayidx8.3 = getelementptr inbounds i16, i16* %1, i32 3 143 %9 = load i16, i16* %arrayidx8.3, align 2 144 %conv9.3 = sext i16 %9 to i32 145 %mul.3 = mul nsw i32 %conv9.3, %conv.3 146 %add.3 = add nsw i32 %mul.3, %add.2 147 store i32 %add.3, i32* %arrayidx, align 4 148 %inc12 = add nuw i32 %i.030, 1 149 %exitcond = icmp eq i32 %inc12, %N 150 br i1 %exitcond, label %for.cond.cleanup, label %for.body 151} 152