1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -cost-model -analyze | FileCheck %s 3 4define void @add_i8() { 5; CHECK-LABEL: 'add_i8' 6; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) 7; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) 8; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) 9; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) 10; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) 11; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 12; 13 %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) 14 15 %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) 16 17 %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) 18 19 %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) 20 21 %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) 22 23 ret void 24} 25 26define void @add_i16() { 27; CHECK-LABEL: 'add_i16' 28; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16> 29; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za) 30; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16> 31; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa) 32; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16> 33; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za) 34; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16> 35; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa) 36; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16> 37; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za) 38; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16> 39; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa) 40; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16> 41; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za) 42; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16> 43; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa) 44; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16> 45; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za) 46; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16> 47; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa) 48; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) 49; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) 50; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) 51; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) 52; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) 53; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 54; 55 %a0za = zext <1 x i8> undef to <1 x i16> 56 %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za) 57 58 %a0sa = sext <1 x i8> undef to <1 x i16> 59 %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa) 60 61 %a1za = zext <2 x i8> undef to <2 x i16> 62 %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za) 63 64 %a1sa = sext <2 x i8> undef to <2 x i16> 65 %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa) 66 67 %a2za = zext <4 x i8> undef to <4 x i16> 68 %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za) 69 70 %a2sa = sext <4 x i8> undef to <4 x i16> 71 %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa) 72 73 %a3za = zext <8 x i8> undef to <8 x i16> 74 %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za) 75 76 %a3sa = sext <8 x i8> undef to <8 x i16> 77 %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa) 78 79 %a4za = zext <16 x i8> undef to <16 x i16> 80 %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za) 81 82 %a4sa = sext <16 x i8> undef to <16 x i16> 83 %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa) 84 85 %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) 86 87 %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) 88 89 %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) 90 91 %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) 92 93 %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) 94 95 ret void 96} 97 98define void @add_i32() { 99; CHECK-LABEL: 'add_i32' 100; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32> 101; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za) 102; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32> 103; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa) 104; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32> 105; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za) 106; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32> 107; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa) 108; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32> 109; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za) 110; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32> 111; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa) 112; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32> 113; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za) 114; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32> 115; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa) 116; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32> 117; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za) 118; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32> 119; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa) 120; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32> 121; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za) 122; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32> 123; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa) 124; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32> 125; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za) 126; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32> 127; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa) 128; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32> 129; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za) 130; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32> 131; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa) 132; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32> 133; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za) 134; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32> 135; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa) 136; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32> 137; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za) 138; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32> 139; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa) 140; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) 141; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) 142; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) 143; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) 144; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) 145; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 146; 147 %a0za = zext <1 x i8> undef to <1 x i32> 148 %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za) 149 150 %a0sa = sext <1 x i8> undef to <1 x i32> 151 %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa) 152 153 %a1za = zext <2 x i8> undef to <2 x i32> 154 %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za) 155 156 %a1sa = sext <2 x i8> undef to <2 x i32> 157 %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa) 158 159 %a2za = zext <4 x i8> undef to <4 x i32> 160 %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za) 161 162 %a2sa = sext <4 x i8> undef to <4 x i32> 163 %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa) 164 165 %a3za = zext <8 x i8> undef to <8 x i32> 166 %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za) 167 168 %a3sa = sext <8 x i8> undef to <8 x i32> 169 %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa) 170 171 %a4za = zext <16 x i8> undef to <16 x i32> 172 %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za) 173 174 %a4sa = sext <16 x i8> undef to <16 x i32> 175 %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa) 176 177 %a5za = zext <1 x i16> undef to <1 x i32> 178 %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za) 179 180 %a5sa = sext <1 x i16> undef to <1 x i32> 181 %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa) 182 183 %a6za = zext <2 x i16> undef to <2 x i32> 184 %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za) 185 186 %a6sa = sext <2 x i16> undef to <2 x i32> 187 %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa) 188 189 %a7za = zext <4 x i16> undef to <4 x i32> 190 %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za) 191 192 %a7sa = sext <4 x i16> undef to <4 x i32> 193 %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa) 194 195 %a8za = zext <8 x i16> undef to <8 x i32> 196 %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za) 197 198 %a8sa = sext <8 x i16> undef to <8 x i32> 199 %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa) 200 201 %a9za = zext <16 x i16> undef to <16 x i32> 202 %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za) 203 204 %a9sa = sext <16 x i16> undef to <16 x i32> 205 %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa) 206 207 %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) 208 209 %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) 210 211 %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) 212 213 %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) 214 215 %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) 216 217 ret void 218} 219 220define void @add_i64() { 221; CHECK-LABEL: 'add_i64' 222; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a0za = zext <1 x i8> undef to <1 x i64> 223; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za) 224; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64> 225; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa) 226; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64> 227; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za) 228; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64> 229; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa) 230; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64> 231; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za) 232; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64> 233; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa) 234; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64> 235; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za) 236; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64> 237; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa) 238; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64> 239; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za) 240; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64> 241; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa) 242; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a5za = zext <1 x i16> undef to <1 x i64> 243; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za) 244; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64> 245; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa) 246; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64> 247; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za) 248; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64> 249; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa) 250; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64> 251; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za) 252; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64> 253; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa) 254; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64> 255; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za) 256; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64> 257; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa) 258; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64> 259; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za) 260; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64> 261; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa) 262; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a10za = zext <1 x i32> undef to <1 x i64> 263; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za) 264; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64> 265; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa) 266; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64> 267; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za) 268; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64> 269; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa) 270; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64> 271; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za) 272; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64> 273; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa) 274; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64> 275; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za) 276; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64> 277; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa) 278; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64> 279; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za) 280; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64> 281; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa) 282; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) 283; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) 284; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) 285; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) 286; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) 287; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 288; 289 %a0za = zext <1 x i8> undef to <1 x i64> 290 %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za) 291 292 %a0sa = sext <1 x i8> undef to <1 x i64> 293 %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa) 294 295 %a1za = zext <2 x i8> undef to <2 x i64> 296 %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za) 297 298 %a1sa = sext <2 x i8> undef to <2 x i64> 299 %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa) 300 301 %a2za = zext <4 x i8> undef to <4 x i64> 302 %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za) 303 304 %a2sa = sext <4 x i8> undef to <4 x i64> 305 %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa) 306 307 %a3za = zext <8 x i8> undef to <8 x i64> 308 %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za) 309 310 %a3sa = sext <8 x i8> undef to <8 x i64> 311 %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa) 312 313 %a4za = zext <16 x i8> undef to <16 x i64> 314 %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za) 315 316 %a4sa = sext <16 x i8> undef to <16 x i64> 317 %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa) 318 319 %a5za = zext <1 x i16> undef to <1 x i64> 320 %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za) 321 322 %a5sa = sext <1 x i16> undef to <1 x i64> 323 %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa) 324 325 %a6za = zext <2 x i16> undef to <2 x i64> 326 %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za) 327 328 %a6sa = sext <2 x i16> undef to <2 x i64> 329 %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa) 330 331 %a7za = zext <4 x i16> undef to <4 x i64> 332 %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za) 333 334 %a7sa = sext <4 x i16> undef to <4 x i64> 335 %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa) 336 337 %a8za = zext <8 x i16> undef to <8 x i64> 338 %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za) 339 340 %a8sa = sext <8 x i16> undef to <8 x i64> 341 %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa) 342 343 %a9za = zext <16 x i16> undef to <16 x i64> 344 %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za) 345 346 %a9sa = sext <16 x i16> undef to <16 x i64> 347 %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa) 348 349 %a10za = zext <1 x i32> undef to <1 x i64> 350 %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za) 351 352 %a10sa = sext <1 x i32> undef to <1 x i64> 353 %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa) 354 355 %a11za = zext <2 x i32> undef to <2 x i64> 356 %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za) 357 358 %a11sa = sext <2 x i32> undef to <2 x i64> 359 %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa) 360 361 %a12za = zext <4 x i32> undef to <4 x i64> 362 %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za) 363 364 %a12sa = sext <4 x i32> undef to <4 x i64> 365 %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa) 366 367 %a13za = zext <8 x i32> undef to <8 x i64> 368 %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za) 369 370 %a13sa = sext <8 x i32> undef to <8 x i64> 371 %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa) 372 373 %a14za = zext <16 x i32> undef to <16 x i64> 374 %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za) 375 376 %a14sa = sext <16 x i32> undef to <16 x i64> 377 %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa) 378 379 %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) 380 381 %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) 382 383 %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) 384 385 %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) 386 387 %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) 388 389 ret void 390} 391 392define void @mla_i8() { 393; CHECK-LABEL: 'mla_i8' 394; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0m = mul <1 x i8> undef, undef 395; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m) 396; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a1m = mul <2 x i8> undef, undef 397; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m) 398; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2m = mul <4 x i8> undef, undef 399; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m) 400; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3m = mul <8 x i8> undef, undef 401; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m) 402; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4m = mul <16 x i8> undef, undef 403; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m) 404; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 405; 406 %a0m = mul <1 x i8> undef, undef 407 %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m) 408 409 %a1m = mul <2 x i8> undef, undef 410 %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m) 411 412 %a2m = mul <4 x i8> undef, undef 413 %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m) 414 415 %a3m = mul <8 x i8> undef, undef 416 %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m) 417 418 %a4m = mul <16 x i8> undef, undef 419 %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m) 420 421 ret void 422} 423 424define void @mla_i16() { 425; CHECK-LABEL: 'mla_i16' 426; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16> 427; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i16> 428; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i16> %a0za, %a0zb 429; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm) 430; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16> 431; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i16> 432; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i16> %a0sa, %a0sb 433; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm) 434; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16> 435; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i16> 436; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %a1zm = mul <2 x i16> %a1za, %a1zb 437; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm) 438; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16> 439; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i16> 440; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %a1sm = mul <2 x i16> %a1sa, %a1sb 441; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm) 442; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16> 443; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zb = zext <4 x i8> undef to <4 x i16> 444; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i16> %a2za, %a2zb 445; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm) 446; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16> 447; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sb = sext <4 x i8> undef to <4 x i16> 448; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i16> %a2sa, %a2sb 449; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm) 450; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16> 451; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3zb = zext <8 x i8> undef to <8 x i16> 452; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3zm = mul <8 x i16> %a3za, %a3zb 453; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm) 454; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16> 455; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sb = sext <8 x i8> undef to <8 x i16> 456; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sm = mul <8 x i16> %a3sa, %a3sb 457; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm) 458; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16> 459; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4zb = zext <16 x i8> undef to <16 x i16> 460; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4zm = mul <16 x i16> %a4za, %a4zb 461; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm) 462; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16> 463; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sb = sext <16 x i8> undef to <16 x i16> 464; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4sm = mul <16 x i16> %a4sa, %a4sb 465; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm) 466; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5m = mul <1 x i16> undef, undef 467; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m) 468; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a6m = mul <2 x i16> undef, undef 469; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m) 470; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7m = mul <4 x i16> undef, undef 471; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m) 472; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8m = mul <8 x i16> undef, undef 473; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m) 474; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9m = mul <16 x i16> undef, undef 475; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m) 476; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 477; 478 %a0za = zext <1 x i8> undef to <1 x i16> 479 %a0zb = zext <1 x i8> undef to <1 x i16> 480 %a0zm = mul <1 x i16> %a0za, %a0zb 481 %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm) 482 483 %a0sa = sext <1 x i8> undef to <1 x i16> 484 %a0sb = sext <1 x i8> undef to <1 x i16> 485 %a0sm = mul <1 x i16> %a0sa, %a0sb 486 %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm) 487 488 %a1za = zext <2 x i8> undef to <2 x i16> 489 %a1zb = zext <2 x i8> undef to <2 x i16> 490 %a1zm = mul <2 x i16> %a1za, %a1zb 491 %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm) 492 493 %a1sa = sext <2 x i8> undef to <2 x i16> 494 %a1sb = sext <2 x i8> undef to <2 x i16> 495 %a1sm = mul <2 x i16> %a1sa, %a1sb 496 %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm) 497 498 %a2za = zext <4 x i8> undef to <4 x i16> 499 %a2zb = zext <4 x i8> undef to <4 x i16> 500 %a2zm = mul <4 x i16> %a2za, %a2zb 501 %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm) 502 503 %a2sa = sext <4 x i8> undef to <4 x i16> 504 %a2sb = sext <4 x i8> undef to <4 x i16> 505 %a2sm = mul <4 x i16> %a2sa, %a2sb 506 %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm) 507 508 %a3za = zext <8 x i8> undef to <8 x i16> 509 %a3zb = zext <8 x i8> undef to <8 x i16> 510 %a3zm = mul <8 x i16> %a3za, %a3zb 511 %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm) 512 513 %a3sa = sext <8 x i8> undef to <8 x i16> 514 %a3sb = sext <8 x i8> undef to <8 x i16> 515 %a3sm = mul <8 x i16> %a3sa, %a3sb 516 %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm) 517 518 %a4za = zext <16 x i8> undef to <16 x i16> 519 %a4zb = zext <16 x i8> undef to <16 x i16> 520 %a4zm = mul <16 x i16> %a4za, %a4zb 521 %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm) 522 523 %a4sa = sext <16 x i8> undef to <16 x i16> 524 %a4sb = sext <16 x i8> undef to <16 x i16> 525 %a4sm = mul <16 x i16> %a4sa, %a4sb 526 %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm) 527 528 %a5m = mul <1 x i16> undef, undef 529 %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m) 530 531 %a6m = mul <2 x i16> undef, undef 532 %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m) 533 534 %a7m = mul <4 x i16> undef, undef 535 %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m) 536 537 %a8m = mul <8 x i16> undef, undef 538 %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m) 539 540 %a9m = mul <16 x i16> undef, undef 541 %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m) 542 543 ret void 544} 545 546define void @mla_i32() { 547; CHECK-LABEL: 'mla_i32' 548; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32> 549; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i32> 550; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i32> %a0za, %a0zb 551; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm) 552; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32> 553; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i32> 554; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i32> %a0sa, %a0sb 555; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm) 556; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32> 557; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i32> 558; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %a1zm = mul <2 x i32> %a1za, %a1zb 559; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm) 560; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32> 561; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i32> 562; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %a1sm = mul <2 x i32> %a1sa, %a1sb 563; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm) 564; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32> 565; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2zb = zext <4 x i8> undef to <4 x i32> 566; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i32> %a2za, %a2zb 567; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm) 568; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32> 569; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sb = sext <4 x i8> undef to <4 x i32> 570; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i32> %a2sa, %a2sb 571; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm) 572; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32> 573; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3zb = zext <8 x i8> undef to <8 x i32> 574; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3zm = mul <8 x i32> %a3za, %a3zb 575; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm) 576; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32> 577; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sb = sext <8 x i8> undef to <8 x i32> 578; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3sm = mul <8 x i32> %a3sa, %a3sb 579; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm) 580; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32> 581; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4zb = zext <16 x i8> undef to <16 x i32> 582; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4zm = mul <16 x i32> %a4za, %a4zb 583; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm) 584; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32> 585; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sb = sext <16 x i8> undef to <16 x i32> 586; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4sm = mul <16 x i32> %a4sa, %a4sb 587; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm) 588; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32> 589; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5zb = zext <1 x i16> undef to <1 x i32> 590; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5zm = mul <1 x i32> %a5za, %a5zb 591; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm) 592; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32> 593; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sb = sext <1 x i16> undef to <1 x i32> 594; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sm = mul <1 x i32> %a5sa, %a5sb 595; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm) 596; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32> 597; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6zb = zext <2 x i16> undef to <2 x i32> 598; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %a6zm = mul <2 x i32> %a6za, %a6zb 599; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm) 600; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32> 601; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sb = sext <2 x i16> undef to <2 x i32> 602; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %a6sm = mul <2 x i32> %a6sa, %a6sb 603; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm) 604; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32> 605; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7zb = zext <4 x i16> undef to <4 x i32> 606; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7zm = mul <4 x i32> %a7za, %a7zb 607; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm) 608; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32> 609; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sb = sext <4 x i16> undef to <4 x i32> 610; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sm = mul <4 x i32> %a7sa, %a7sb 611; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm) 612; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32> 613; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8zb = zext <8 x i16> undef to <8 x i32> 614; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8zm = mul <8 x i32> %a8za, %a8zb 615; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm) 616; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32> 617; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sb = sext <8 x i16> undef to <8 x i32> 618; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8sm = mul <8 x i32> %a8sa, %a8sb 619; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm) 620; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32> 621; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9zb = zext <16 x i16> undef to <16 x i32> 622; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9zm = mul <16 x i32> %a9za, %a9zb 623; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm) 624; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32> 625; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sb = sext <16 x i16> undef to <16 x i32> 626; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9sm = mul <16 x i32> %a9sa, %a9sb 627; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm) 628; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a10m = mul <1 x i32> undef, undef 629; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m) 630; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a11m = mul <2 x i32> undef, undef 631; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m) 632; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12m = mul <4 x i32> undef, undef 633; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m) 634; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13m = mul <8 x i32> undef, undef 635; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m) 636; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14m = mul <16 x i32> undef, undef 637; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m) 638; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 639; 640 %a0za = zext <1 x i8> undef to <1 x i32> 641 %a0zb = zext <1 x i8> undef to <1 x i32> 642 %a0zm = mul <1 x i32> %a0za, %a0zb 643 %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm) 644 645 %a0sa = sext <1 x i8> undef to <1 x i32> 646 %a0sb = sext <1 x i8> undef to <1 x i32> 647 %a0sm = mul <1 x i32> %a0sa, %a0sb 648 %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm) 649 650 %a1za = zext <2 x i8> undef to <2 x i32> 651 %a1zb = zext <2 x i8> undef to <2 x i32> 652 %a1zm = mul <2 x i32> %a1za, %a1zb 653 %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm) 654 655 %a1sa = sext <2 x i8> undef to <2 x i32> 656 %a1sb = sext <2 x i8> undef to <2 x i32> 657 %a1sm = mul <2 x i32> %a1sa, %a1sb 658 %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm) 659 660 %a2za = zext <4 x i8> undef to <4 x i32> 661 %a2zb = zext <4 x i8> undef to <4 x i32> 662 %a2zm = mul <4 x i32> %a2za, %a2zb 663 %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm) 664 665 %a2sa = sext <4 x i8> undef to <4 x i32> 666 %a2sb = sext <4 x i8> undef to <4 x i32> 667 %a2sm = mul <4 x i32> %a2sa, %a2sb 668 %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm) 669 670 %a3za = zext <8 x i8> undef to <8 x i32> 671 %a3zb = zext <8 x i8> undef to <8 x i32> 672 %a3zm = mul <8 x i32> %a3za, %a3zb 673 %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm) 674 675 %a3sa = sext <8 x i8> undef to <8 x i32> 676 %a3sb = sext <8 x i8> undef to <8 x i32> 677 %a3sm = mul <8 x i32> %a3sa, %a3sb 678 %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm) 679 680 %a4za = zext <16 x i8> undef to <16 x i32> 681 %a4zb = zext <16 x i8> undef to <16 x i32> 682 %a4zm = mul <16 x i32> %a4za, %a4zb 683 %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm) 684 685 %a4sa = sext <16 x i8> undef to <16 x i32> 686 %a4sb = sext <16 x i8> undef to <16 x i32> 687 %a4sm = mul <16 x i32> %a4sa, %a4sb 688 %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm) 689 690 %a5za = zext <1 x i16> undef to <1 x i32> 691 %a5zb = zext <1 x i16> undef to <1 x i32> 692 %a5zm = mul <1 x i32> %a5za, %a5zb 693 %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm) 694 695 %a5sa = sext <1 x i16> undef to <1 x i32> 696 %a5sb = sext <1 x i16> undef to <1 x i32> 697 %a5sm = mul <1 x i32> %a5sa, %a5sb 698 %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm) 699 700 %a6za = zext <2 x i16> undef to <2 x i32> 701 %a6zb = zext <2 x i16> undef to <2 x i32> 702 %a6zm = mul <2 x i32> %a6za, %a6zb 703 %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm) 704 705 %a6sa = sext <2 x i16> undef to <2 x i32> 706 %a6sb = sext <2 x i16> undef to <2 x i32> 707 %a6sm = mul <2 x i32> %a6sa, %a6sb 708 %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm) 709 710 %a7za = zext <4 x i16> undef to <4 x i32> 711 %a7zb = zext <4 x i16> undef to <4 x i32> 712 %a7zm = mul <4 x i32> %a7za, %a7zb 713 %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm) 714 715 %a7sa = sext <4 x i16> undef to <4 x i32> 716 %a7sb = sext <4 x i16> undef to <4 x i32> 717 %a7sm = mul <4 x i32> %a7sa, %a7sb 718 %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm) 719 720 %a8za = zext <8 x i16> undef to <8 x i32> 721 %a8zb = zext <8 x i16> undef to <8 x i32> 722 %a8zm = mul <8 x i32> %a8za, %a8zb 723 %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm) 724 725 %a8sa = sext <8 x i16> undef to <8 x i32> 726 %a8sb = sext <8 x i16> undef to <8 x i32> 727 %a8sm = mul <8 x i32> %a8sa, %a8sb 728 %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm) 729 730 %a9za = zext <16 x i16> undef to <16 x i32> 731 %a9zb = zext <16 x i16> undef to <16 x i32> 732 %a9zm = mul <16 x i32> %a9za, %a9zb 733 %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm) 734 735 %a9sa = sext <16 x i16> undef to <16 x i32> 736 %a9sb = sext <16 x i16> undef to <16 x i32> 737 %a9sm = mul <16 x i32> %a9sa, %a9sb 738 %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm) 739 740 %a10m = mul <1 x i32> undef, undef 741 %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m) 742 743 %a11m = mul <2 x i32> undef, undef 744 %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m) 745 746 %a12m = mul <4 x i32> undef, undef 747 %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m) 748 749 %a13m = mul <8 x i32> undef, undef 750 %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m) 751 752 %a14m = mul <16 x i32> undef, undef 753 %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m) 754 755 ret void 756} 757 758define void @mla_i64() { 759; CHECK-LABEL: 'mla_i64' 760; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a0za = zext <1 x i8> undef to <1 x i64> 761; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a0zb = zext <1 x i8> undef to <1 x i64> 762; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0zm = mul <1 x i64> %a0za, %a0zb 763; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm) 764; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64> 765; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a0sb = sext <1 x i8> undef to <1 x i64> 766; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0sm = mul <1 x i64> %a0sa, %a0sb 767; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm) 768; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64> 769; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1zb = zext <2 x i8> undef to <2 x i64> 770; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a1zm = mul <2 x i64> %a1za, %a1zb 771; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm) 772; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64> 773; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sb = sext <2 x i8> undef to <2 x i64> 774; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a1sm = mul <2 x i64> %a1sa, %a1sb 775; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm) 776; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64> 777; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2zb = zext <4 x i8> undef to <4 x i64> 778; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %a2zm = mul <4 x i64> %a2za, %a2zb 779; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm) 780; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64> 781; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sb = sext <4 x i8> undef to <4 x i64> 782; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %a2sm = mul <4 x i64> %a2sa, %a2sb 783; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm) 784; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64> 785; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3zb = zext <8 x i8> undef to <8 x i64> 786; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a3zm = mul <8 x i64> %a3za, %a3zb 787; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm) 788; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64> 789; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sb = sext <8 x i8> undef to <8 x i64> 790; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a3sm = mul <8 x i64> %a3sa, %a3sb 791; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm) 792; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64> 793; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4zb = zext <16 x i8> undef to <16 x i64> 794; CHECK-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %a4zm = mul <16 x i64> %a4za, %a4zb 795; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm) 796; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64> 797; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sb = sext <16 x i8> undef to <16 x i64> 798; CHECK-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %a4sm = mul <16 x i64> %a4sa, %a4sb 799; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm) 800; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a5za = zext <1 x i16> undef to <1 x i64> 801; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a5zb = zext <1 x i16> undef to <1 x i64> 802; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5zm = mul <1 x i64> %a5za, %a5zb 803; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm) 804; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64> 805; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5sb = sext <1 x i16> undef to <1 x i64> 806; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5sm = mul <1 x i64> %a5sa, %a5sb 807; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm) 808; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64> 809; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6zb = zext <2 x i16> undef to <2 x i64> 810; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a6zm = mul <2 x i64> %a6za, %a6zb 811; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm) 812; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64> 813; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sb = sext <2 x i16> undef to <2 x i64> 814; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a6sm = mul <2 x i64> %a6sa, %a6sb 815; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm) 816; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64> 817; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7zb = zext <4 x i16> undef to <4 x i64> 818; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %a7zm = mul <4 x i64> %a7za, %a7zb 819; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm) 820; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64> 821; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sb = sext <4 x i16> undef to <4 x i64> 822; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %a7sm = mul <4 x i64> %a7sa, %a7sb 823; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm) 824; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64> 825; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8zb = zext <8 x i16> undef to <8 x i64> 826; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a8zm = mul <8 x i64> %a8za, %a8zb 827; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm) 828; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64> 829; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sb = sext <8 x i16> undef to <8 x i64> 830; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a8sm = mul <8 x i64> %a8sa, %a8sb 831; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm) 832; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64> 833; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9zb = zext <16 x i16> undef to <16 x i64> 834; CHECK-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %a9zm = mul <16 x i64> %a9za, %a9zb 835; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm) 836; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64> 837; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sb = sext <16 x i16> undef to <16 x i64> 838; CHECK-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %a9sm = mul <16 x i64> %a9sa, %a9sb 839; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm) 840; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a10za = zext <1 x i32> undef to <1 x i64> 841; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a10zb = zext <1 x i32> undef to <1 x i64> 842; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10zm = mul <1 x i64> %a10za, %a10zb 843; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm) 844; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64> 845; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a10sb = sext <1 x i32> undef to <1 x i64> 846; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10sm = mul <1 x i64> %a10sa, %a10sb 847; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm) 848; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64> 849; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11zb = zext <2 x i32> undef to <2 x i64> 850; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11zm = mul <2 x i64> %a11za, %a11zb 851; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm) 852; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64> 853; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sb = sext <2 x i32> undef to <2 x i64> 854; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sm = mul <2 x i64> %a11sa, %a11sb 855; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm) 856; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64> 857; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12zb = zext <4 x i32> undef to <4 x i64> 858; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %a12zm = mul <4 x i64> %a12za, %a12zb 859; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm) 860; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64> 861; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sb = sext <4 x i32> undef to <4 x i64> 862; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %a12sm = mul <4 x i64> %a12sa, %a12sb 863; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm) 864; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64> 865; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13zb = zext <8 x i32> undef to <8 x i64> 866; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a13zm = mul <8 x i64> %a13za, %a13zb 867; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm) 868; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64> 869; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sb = sext <8 x i32> undef to <8 x i64> 870; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a13sm = mul <8 x i64> %a13sa, %a13sb 871; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm) 872; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64> 873; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14zb = zext <16 x i32> undef to <16 x i64> 874; CHECK-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %a14zm = mul <16 x i64> %a14za, %a14zb 875; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm) 876; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64> 877; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sb = sext <16 x i32> undef to <16 x i64> 878; CHECK-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %a14sm = mul <16 x i64> %a14sa, %a14sb 879; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm) 880; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a15m = mul <1 x i64> undef, undef 881; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m) 882; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a16m = mul <2 x i64> undef, undef 883; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m) 884; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %a17m = mul <4 x i64> undef, undef 885; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m) 886; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %a18m = mul <8 x i64> undef, undef 887; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m) 888; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a19m = mul <16 x i64> undef, undef 889; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m) 890; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 891; 892 %a0za = zext <1 x i8> undef to <1 x i64> 893 %a0zb = zext <1 x i8> undef to <1 x i64> 894 %a0zm = mul <1 x i64> %a0za, %a0zb 895 %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm) 896 897 %a0sa = sext <1 x i8> undef to <1 x i64> 898 %a0sb = sext <1 x i8> undef to <1 x i64> 899 %a0sm = mul <1 x i64> %a0sa, %a0sb 900 %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm) 901 902 %a1za = zext <2 x i8> undef to <2 x i64> 903 %a1zb = zext <2 x i8> undef to <2 x i64> 904 %a1zm = mul <2 x i64> %a1za, %a1zb 905 %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm) 906 907 %a1sa = sext <2 x i8> undef to <2 x i64> 908 %a1sb = sext <2 x i8> undef to <2 x i64> 909 %a1sm = mul <2 x i64> %a1sa, %a1sb 910 %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm) 911 912 %a2za = zext <4 x i8> undef to <4 x i64> 913 %a2zb = zext <4 x i8> undef to <4 x i64> 914 %a2zm = mul <4 x i64> %a2za, %a2zb 915 %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm) 916 917 %a2sa = sext <4 x i8> undef to <4 x i64> 918 %a2sb = sext <4 x i8> undef to <4 x i64> 919 %a2sm = mul <4 x i64> %a2sa, %a2sb 920 %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm) 921 922 %a3za = zext <8 x i8> undef to <8 x i64> 923 %a3zb = zext <8 x i8> undef to <8 x i64> 924 %a3zm = mul <8 x i64> %a3za, %a3zb 925 %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm) 926 927 %a3sa = sext <8 x i8> undef to <8 x i64> 928 %a3sb = sext <8 x i8> undef to <8 x i64> 929 %a3sm = mul <8 x i64> %a3sa, %a3sb 930 %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm) 931 932 %a4za = zext <16 x i8> undef to <16 x i64> 933 %a4zb = zext <16 x i8> undef to <16 x i64> 934 %a4zm = mul <16 x i64> %a4za, %a4zb 935 %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm) 936 937 %a4sa = sext <16 x i8> undef to <16 x i64> 938 %a4sb = sext <16 x i8> undef to <16 x i64> 939 %a4sm = mul <16 x i64> %a4sa, %a4sb 940 %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm) 941 942 %a5za = zext <1 x i16> undef to <1 x i64> 943 %a5zb = zext <1 x i16> undef to <1 x i64> 944 %a5zm = mul <1 x i64> %a5za, %a5zb 945 %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm) 946 947 %a5sa = sext <1 x i16> undef to <1 x i64> 948 %a5sb = sext <1 x i16> undef to <1 x i64> 949 %a5sm = mul <1 x i64> %a5sa, %a5sb 950 %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm) 951 952 %a6za = zext <2 x i16> undef to <2 x i64> 953 %a6zb = zext <2 x i16> undef to <2 x i64> 954 %a6zm = mul <2 x i64> %a6za, %a6zb 955 %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm) 956 957 %a6sa = sext <2 x i16> undef to <2 x i64> 958 %a6sb = sext <2 x i16> undef to <2 x i64> 959 %a6sm = mul <2 x i64> %a6sa, %a6sb 960 %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm) 961 962 %a7za = zext <4 x i16> undef to <4 x i64> 963 %a7zb = zext <4 x i16> undef to <4 x i64> 964 %a7zm = mul <4 x i64> %a7za, %a7zb 965 %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm) 966 967 %a7sa = sext <4 x i16> undef to <4 x i64> 968 %a7sb = sext <4 x i16> undef to <4 x i64> 969 %a7sm = mul <4 x i64> %a7sa, %a7sb 970 %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm) 971 972 %a8za = zext <8 x i16> undef to <8 x i64> 973 %a8zb = zext <8 x i16> undef to <8 x i64> 974 %a8zm = mul <8 x i64> %a8za, %a8zb 975 %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm) 976 977 %a8sa = sext <8 x i16> undef to <8 x i64> 978 %a8sb = sext <8 x i16> undef to <8 x i64> 979 %a8sm = mul <8 x i64> %a8sa, %a8sb 980 %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm) 981 982 %a9za = zext <16 x i16> undef to <16 x i64> 983 %a9zb = zext <16 x i16> undef to <16 x i64> 984 %a9zm = mul <16 x i64> %a9za, %a9zb 985 %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm) 986 987 %a9sa = sext <16 x i16> undef to <16 x i64> 988 %a9sb = sext <16 x i16> undef to <16 x i64> 989 %a9sm = mul <16 x i64> %a9sa, %a9sb 990 %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm) 991 992 %a10za = zext <1 x i32> undef to <1 x i64> 993 %a10zb = zext <1 x i32> undef to <1 x i64> 994 %a10zm = mul <1 x i64> %a10za, %a10zb 995 %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm) 996 997 %a10sa = sext <1 x i32> undef to <1 x i64> 998 %a10sb = sext <1 x i32> undef to <1 x i64> 999 %a10sm = mul <1 x i64> %a10sa, %a10sb 1000 %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm) 1001 1002 %a11za = zext <2 x i32> undef to <2 x i64> 1003 %a11zb = zext <2 x i32> undef to <2 x i64> 1004 %a11zm = mul <2 x i64> %a11za, %a11zb 1005 %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm) 1006 1007 %a11sa = sext <2 x i32> undef to <2 x i64> 1008 %a11sb = sext <2 x i32> undef to <2 x i64> 1009 %a11sm = mul <2 x i64> %a11sa, %a11sb 1010 %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm) 1011 1012 %a12za = zext <4 x i32> undef to <4 x i64> 1013 %a12zb = zext <4 x i32> undef to <4 x i64> 1014 %a12zm = mul <4 x i64> %a12za, %a12zb 1015 %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm) 1016 1017 %a12sa = sext <4 x i32> undef to <4 x i64> 1018 %a12sb = sext <4 x i32> undef to <4 x i64> 1019 %a12sm = mul <4 x i64> %a12sa, %a12sb 1020 %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm) 1021 1022 %a13za = zext <8 x i32> undef to <8 x i64> 1023 %a13zb = zext <8 x i32> undef to <8 x i64> 1024 %a13zm = mul <8 x i64> %a13za, %a13zb 1025 %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm) 1026 1027 %a13sa = sext <8 x i32> undef to <8 x i64> 1028 %a13sb = sext <8 x i32> undef to <8 x i64> 1029 %a13sm = mul <8 x i64> %a13sa, %a13sb 1030 %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm) 1031 1032 %a14za = zext <16 x i32> undef to <16 x i64> 1033 %a14zb = zext <16 x i32> undef to <16 x i64> 1034 %a14zm = mul <16 x i64> %a14za, %a14zb 1035 %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm) 1036 1037 %a14sa = sext <16 x i32> undef to <16 x i64> 1038 %a14sb = sext <16 x i32> undef to <16 x i64> 1039 %a14sm = mul <16 x i64> %a14sa, %a14sb 1040 %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm) 1041 1042 %a15m = mul <1 x i64> undef, undef 1043 %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m) 1044 1045 %a16m = mul <2 x i64> undef, undef 1046 %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m) 1047 1048 %a17m = mul <4 x i64> undef, undef 1049 %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m) 1050 1051 %a18m = mul <8 x i64> undef, undef 1052 %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m) 1053 1054 %a19m = mul <16 x i64> undef, undef 1055 %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m) 1056 1057 ret void 1058} 1059 1060declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) 1061declare i16 @llvm.vector.reduce.add.v1i16(<1 x i16>) 1062declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) 1063declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) 1064declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) 1065declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) 1066declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32>) 1067declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) 1068declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) 1069declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) 1070declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) 1071declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>) 1072declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) 1073declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) 1074declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) 1075declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) 1076declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>) 1077declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) 1078declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) 1079declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) 1080