1; RUN: opt -loop-vectorize -force-vector-width=2 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_2 2; RUN: opt -loop-vectorize -force-vector-width=4 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_4 3; RUN: opt -loop-vectorize -force-vector-width=8 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_8 4; RUN: opt -loop-vectorize -force-vector-width=16 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_16 5; REQUIRES: asserts 6 7target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" 8target triple = "thumbv8.1m.main-none-eabi" 9 10; Factor 2 11 12%i8.2 = type {i8, i8} 13define void @i8_factor_2(%i8.2* %data, i64 %n) #0 { 14entry: 15 br label %for.body 16 17; VF_2-LABEL: Checking a loop in "i8_factor_2" 18; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 19; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 20; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1 21; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1 22; VF_4-LABEL: Checking a loop in "i8_factor_2" 23; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 24; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 25; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1 26; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1 27; VF_8-LABEL: Checking a loop in "i8_factor_2" 28; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 29; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 30; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1 31; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1 32; VF_16-LABEL: Checking a loop in "i8_factor_2" 33; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 34; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 35; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1 36; VF_16-NEXT: Found an estimated cost of 4 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1 37for.body: 38 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 39 %tmp0 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 0 40 %tmp1 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 1 41 %tmp2 = load i8, i8* %tmp0, align 1 42 %tmp3 = load i8, i8* %tmp1, align 1 43 store i8 0, i8* %tmp0, align 1 44 store i8 0, i8* %tmp1, align 1 45 %i.next = add nuw nsw i64 %i, 1 46 %cond = icmp slt i64 %i.next, %n 47 br i1 %cond, label %for.body, label %for.end 48 49for.end: 50 ret void 51} 52 53%i16.2 = type {i16, i16} 54define void @i16_factor_2(%i16.2* %data, i64 %n) #0 { 55entry: 56 br label %for.body 57 58; VF_2-LABEL: Checking a loop in "i16_factor_2" 59; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 60; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 61; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2 62; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2 63; VF_4-LABEL: Checking a loop in "i16_factor_2" 64; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 65; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 66; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2 67; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2 68; VF_8-LABEL: Checking a loop in "i16_factor_2" 69; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 70; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 71; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2 72; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2 73; VF_16-LABEL: Checking a loop in "i16_factor_2" 74; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 75; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 76; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2 77; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2 78for.body: 79 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 80 %tmp0 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 0 81 %tmp1 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 1 82 %tmp2 = load i16, i16* %tmp0, align 2 83 %tmp3 = load i16, i16* %tmp1, align 2 84 store i16 0, i16* %tmp0, align 2 85 store i16 0, i16* %tmp1, align 2 86 %i.next = add nuw nsw i64 %i, 1 87 %cond = icmp slt i64 %i.next, %n 88 br i1 %cond, label %for.body, label %for.end 89 90for.end: 91 ret void 92} 93 94%i32.2 = type {i32, i32} 95define void @i32_factor_2(%i32.2* %data, i64 %n) #0 { 96entry: 97 br label %for.body 98 99; VF_2-LABEL: Checking a loop in "i32_factor_2" 100; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 101; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 102; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4 103; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4 104; VF_4-LABEL: Checking a loop in "i32_factor_2" 105; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 106; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 107; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4 108; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4 109; VF_8-LABEL: Checking a loop in "i32_factor_2" 110; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 111; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 112; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4 113; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4 114; VF_16-LABEL: Checking a loop in "i32_factor_2" 115; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 116; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 117; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4 118; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4 119for.body: 120 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 121 %tmp0 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 0 122 %tmp1 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 1 123 %tmp2 = load i32, i32* %tmp0, align 4 124 %tmp3 = load i32, i32* %tmp1, align 4 125 store i32 0, i32* %tmp0, align 4 126 store i32 0, i32* %tmp1, align 4 127 %i.next = add nuw nsw i64 %i, 1 128 %cond = icmp slt i64 %i.next, %n 129 br i1 %cond, label %for.body, label %for.end 130 131for.end: 132 ret void 133} 134 135%i64.2 = type {i64, i64} 136define void @i64_factor_2(%i64.2* %data, i64 %n) #0 { 137entry: 138 br label %for.body 139 140; VF_2-LABEL: Checking a loop in "i64_factor_2" 141; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 142; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 143; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8 144; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8 145; VF_4-LABEL: Checking a loop in "i64_factor_2" 146; VF_4: Found an estimated cost of 80 for VF 4 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 147; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 148; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8 149; VF_4-NEXT: Found an estimated cost of 48 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8 150; VF_8-LABEL: Checking a loop in "i64_factor_2" 151; VF_8: Found an estimated cost of 288 for VF 8 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 152; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 153; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8 154; VF_8-NEXT: Found an estimated cost of 160 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8 155; VF_16-LABEL: Checking a loop in "i64_factor_2" 156; VF_16: Found an estimated cost of 1088 for VF 16 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 157; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 158; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8 159; VF_16-NEXT: Found an estimated cost of 576 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8 160for.body: 161 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 162 %tmp0 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 0 163 %tmp1 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 1 164 %tmp2 = load i64, i64* %tmp0, align 8 165 %tmp3 = load i64, i64* %tmp1, align 8 166 store i64 0, i64* %tmp0, align 8 167 store i64 0, i64* %tmp1, align 8 168 %i.next = add nuw nsw i64 %i, 1 169 %cond = icmp slt i64 %i.next, %n 170 br i1 %cond, label %for.body, label %for.end 171 172for.end: 173 ret void 174} 175 176%f16.2 = type {half, half} 177define void @f16_factor_2(%f16.2* %data, i64 %n) #0 { 178entry: 179 br label %for.body 180 181; VF_2-LABEL: Checking a loop in "f16_factor_2" 182; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load half, half* %tmp0, align 2 183; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load half, half* %tmp1, align 2 184; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2 185; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2 186; VF_4-LABEL: Checking a loop in "f16_factor_2" 187; VF_4: Found an estimated cost of 72 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2 188; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2 189; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2 190; VF_4-NEXT: Found an estimated cost of 40 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2 191; VF_8-LABEL: Checking a loop in "f16_factor_2" 192; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2 193; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2 194; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2 195; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2 196; VF_16-LABEL: Checking a loop in "f16_factor_2" 197; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load half, half* %tmp0, align 2 198; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load half, half* %tmp1, align 2 199; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2 200; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2 201for.body: 202 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 203 %tmp0 = getelementptr inbounds %f16.2, %f16.2* %data, i64 %i, i32 0 204 %tmp1 = getelementptr inbounds %f16.2, %f16.2* %data, i64 %i, i32 1 205 %tmp2 = load half, half* %tmp0, align 2 206 %tmp3 = load half, half* %tmp1, align 2 207 store half 0.0, half* %tmp0, align 2 208 store half 0.0, half* %tmp1, align 2 209 %i.next = add nuw nsw i64 %i, 1 210 %cond = icmp slt i64 %i.next, %n 211 br i1 %cond, label %for.body, label %for.end 212 213for.end: 214 ret void 215} 216 217%f32.2 = type {float, float} 218define void @f32_factor_2(%f32.2* %data, i64 %n) #0 { 219entry: 220 br label %for.body 221 222; VF_2-LABEL: Checking a loop in "f32_factor_2" 223; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load float, float* %tmp0, align 4 224; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load float, float* %tmp1, align 4 225; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4 226; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4 227; VF_4-LABEL: Checking a loop in "f32_factor_2" 228; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load float, float* %tmp0, align 4 229; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load float, float* %tmp1, align 4 230; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4 231; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4 232; VF_8-LABEL: Checking a loop in "f32_factor_2" 233; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load float, float* %tmp0, align 4 234; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load float, float* %tmp1, align 4 235; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4 236; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4 237; VF_16-LABEL: Checking a loop in "f32_factor_2" 238; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load float, float* %tmp0, align 4 239; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load float, float* %tmp1, align 4 240; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4 241; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4 242for.body: 243 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 244 %tmp0 = getelementptr inbounds %f32.2, %f32.2* %data, i64 %i, i32 0 245 %tmp1 = getelementptr inbounds %f32.2, %f32.2* %data, i64 %i, i32 1 246 %tmp2 = load float, float* %tmp0, align 4 247 %tmp3 = load float, float* %tmp1, align 4 248 store float 0.0, float* %tmp0, align 4 249 store float 0.0, float* %tmp1, align 4 250 %i.next = add nuw nsw i64 %i, 1 251 %cond = icmp slt i64 %i.next, %n 252 br i1 %cond, label %for.body, label %for.end 253 254for.end: 255 ret void 256} 257 258%f64.2 = type {double, double} 259define void @f64_factor_2(%f64.2* %data, i64 %n) #0 { 260entry: 261 br label %for.body 262 263; VF_2-LABEL: Checking a loop in "f64_factor_2" 264; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load double, double* %tmp0, align 8 265; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load double, double* %tmp1, align 8 266; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8 267; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8 268; VF_4-LABEL: Checking a loop in "f64_factor_2" 269; VF_4: Found an estimated cost of 72 for VF 4 For instruction: %tmp2 = load double, double* %tmp0, align 8 270; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load double, double* %tmp1, align 8 271; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8 272; VF_4-NEXT: Found an estimated cost of 40 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8 273; VF_8-LABEL: Checking a loop in "f64_factor_2" 274; VF_8: Found an estimated cost of 272 for VF 8 For instruction: %tmp2 = load double, double* %tmp0, align 8 275; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load double, double* %tmp1, align 8 276; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8 277; VF_8-NEXT: Found an estimated cost of 144 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8 278; VF_16-LABEL: Checking a loop in "f64_factor_2" 279; VF_16: Found an estimated cost of 1056 for VF 16 For instruction: %tmp2 = load double, double* %tmp0, align 8 280; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load double, double* %tmp1, align 8 281; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8 282; VF_16-NEXT: Found an estimated cost of 544 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8 283for.body: 284 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 285 %tmp0 = getelementptr inbounds %f64.2, %f64.2* %data, i64 %i, i32 0 286 %tmp1 = getelementptr inbounds %f64.2, %f64.2* %data, i64 %i, i32 1 287 %tmp2 = load double, double* %tmp0, align 8 288 %tmp3 = load double, double* %tmp1, align 8 289 store double 0.0, double* %tmp0, align 8 290 store double 0.0, double* %tmp1, align 8 291 %i.next = add nuw nsw i64 %i, 1 292 %cond = icmp slt i64 %i.next, %n 293 br i1 %cond, label %for.body, label %for.end 294 295for.end: 296 ret void 297} 298 299 300 301; Factor 3 302 303%i8.3 = type {i8, i8, i8} 304define void @i8_factor_3(%i8.3* %data, i64 %n) #0 { 305entry: 306 br label %for.body 307 308; VF_2-LABEL: Checking a loop in "i8_factor_3" 309; VF_2: Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 310; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i8, i8* %tmp1, align 1 311; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, i8* %tmp2, align 1 312; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1 313; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1 314; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store i8 0, i8* %tmp2, align 1 315; VF_4-LABEL: Checking a loop in "i8_factor_3" 316; VF_4: Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 317; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i8, i8* %tmp1, align 1 318; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, i8* %tmp2, align 1 319; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1 320; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1 321; VF_4-NEXT: Found an estimated cost of 60 for VF 4 For instruction: store i8 0, i8* %tmp2, align 1 322; VF_8-LABEL: Checking a loop in "i8_factor_3" 323; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 324; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp1, align 1 325; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp2, align 1 326; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1 327; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1 328; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1 329; VF_16-LABEL: Checking a loop in "i8_factor_3" 330; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 331; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp1, align 1 332; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp2, align 1 333; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1 334; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1 335; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1 336for.body: 337 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 338 %tmp0 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 0 339 %tmp1 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 1 340 %tmp2 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 2 341 %tmp3 = load i8, i8* %tmp0, align 1 342 %tmp4 = load i8, i8* %tmp1, align 1 343 %tmp5 = load i8, i8* %tmp2, align 1 344 store i8 0, i8* %tmp0, align 1 345 store i8 0, i8* %tmp1, align 1 346 store i8 0, i8* %tmp2, align 1 347 %i.next = add nuw nsw i64 %i, 1 348 %cond = icmp slt i64 %i.next, %n 349 br i1 %cond, label %for.body, label %for.end 350 351for.end: 352 ret void 353} 354 355%i16.3 = type {i16, i16, i16} 356define void @i16_factor_3(%i16.3* %data, i64 %n) #0 { 357entry: 358 br label %for.body 359 360; VF_2-LABEL: Checking a loop in "i16_factor_3" 361; VF_2: Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 362; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i16, i16* %tmp1, align 2 363; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, i16* %tmp2, align 2 364; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2 365; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2 366; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store i16 0, i16* %tmp2, align 2 367; VF_4-LABEL: Checking a loop in "i16_factor_3" 368; VF_4: Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 369; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp1, align 2 370; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp2, align 2 371; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2 372; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2 373; VF_4-NEXT: Found an estimated cost of 60 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2 374; VF_8-LABEL: Checking a loop in "i16_factor_3" 375; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 376; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp1, align 2 377; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp2, align 2 378; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2 379; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2 380; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2 381; VF_16-LABEL: Checking a loop in "i16_factor_3" 382; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 383; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp1, align 2 384; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp2, align 2 385; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2 386; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2 387; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2 388for.body: 389 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 390 %tmp0 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 0 391 %tmp1 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 1 392 %tmp2 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 2 393 %tmp3 = load i16, i16* %tmp0, align 2 394 %tmp4 = load i16, i16* %tmp1, align 2 395 %tmp5 = load i16, i16* %tmp2, align 2 396 store i16 0, i16* %tmp0, align 2 397 store i16 0, i16* %tmp1, align 2 398 store i16 0, i16* %tmp2, align 2 399 %i.next = add nuw nsw i64 %i, 1 400 %cond = icmp slt i64 %i.next, %n 401 br i1 %cond, label %for.body, label %for.end 402 403for.end: 404 ret void 405} 406 407%i32.3 = type {i32, i32, i32} 408define void @i32_factor_3(%i32.3* %data, i64 %n) #0 { 409entry: 410 br label %for.body 411 412; VF_2-LABEL: Checking a loop in "i32_factor_3" 413; VF_2: Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 414; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp1, align 4 415; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp2, align 4 416; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4 417; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4 418; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4 419; VF_4-LABEL: Checking a loop in "i32_factor_3" 420; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 421; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp1, align 4 422; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp2, align 4 423; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4 424; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4 425; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4 426; VF_8-LABEL: Checking a loop in "i32_factor_3" 427; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 428; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp1, align 4 429; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp2, align 4 430; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4 431; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4 432; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4 433; VF_16-LABEL: Checking a loop in "i32_factor_3" 434; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 435; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp1, align 4 436; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp2, align 4 437; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4 438; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4 439; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4 440for.body: 441 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 442 %tmp0 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 0 443 %tmp1 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 1 444 %tmp2 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 2 445 %tmp3 = load i32, i32* %tmp0, align 4 446 %tmp4 = load i32, i32* %tmp1, align 4 447 %tmp5 = load i32, i32* %tmp2, align 4 448 store i32 0, i32* %tmp0, align 4 449 store i32 0, i32* %tmp1, align 4 450 store i32 0, i32* %tmp2, align 4 451 %i.next = add nuw nsw i64 %i, 1 452 %cond = icmp slt i64 %i.next, %n 453 br i1 %cond, label %for.body, label %for.end 454 455for.end: 456 ret void 457} 458 459%i64.3 = type {i64, i64, i64} 460define void @i64_factor_3(%i64.3* %data, i64 %n) #0 { 461entry: 462 br label %for.body 463 464; VF_2-LABEL: Checking a loop in "i64_factor_3" 465; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 466; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i64, i64* %tmp1, align 8 467; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, i64* %tmp2, align 8 468; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8 469; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8 470; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i64 0, i64* %tmp2, align 8 471; VF_4-LABEL: Checking a loop in "i64_factor_3" 472; VF_4: Found an estimated cost of 120 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 473; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i64, i64* %tmp1, align 8 474; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, i64* %tmp2, align 8 475; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8 476; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8 477; VF_4-NEXT: Found an estimated cost of 72 for VF 4 For instruction: store i64 0, i64* %tmp2, align 8 478; VF_8-LABEL: Checking a loop in "i64_factor_3" 479; VF_8: Found an estimated cost of 432 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 480; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i64, i64* %tmp1, align 8 481; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, i64* %tmp2, align 8 482; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8 483; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8 484; VF_8-NEXT: Found an estimated cost of 240 for VF 8 For instruction: store i64 0, i64* %tmp2, align 8 485; VF_16-LABEL: Checking a loop in "i64_factor_3" 486; VF_16: Found an estimated cost of 1632 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 487; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i64, i64* %tmp1, align 8 488; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, i64* %tmp2, align 8 489; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8 490; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8 491; VF_16-NEXT: Found an estimated cost of 864 for VF 16 For instruction: store i64 0, i64* %tmp2, align 8 492for.body: 493 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 494 %tmp0 = getelementptr inbounds %i64.3, %i64.3* %data, i64 %i, i32 0 495 %tmp1 = getelementptr inbounds %i64.3, %i64.3* %data, i64 %i, i32 1 496 %tmp2 = getelementptr inbounds %i64.3, %i64.3* %data, i64 %i, i32 2 497 %tmp3 = load i64, i64* %tmp0, align 8 498 %tmp4 = load i64, i64* %tmp1, align 8 499 %tmp5 = load i64, i64* %tmp2, align 8 500 store i64 0, i64* %tmp0, align 8 501 store i64 0, i64* %tmp1, align 8 502 store i64 0, i64* %tmp2, align 8 503 %i.next = add nuw nsw i64 %i, 1 504 %cond = icmp slt i64 %i.next, %n 505 br i1 %cond, label %for.body, label %for.end 506 507for.end: 508 ret void 509} 510 511%f16.3 = type {half, half, half} 512define void @f16_factor_3(%f16.3* %data, i64 %n) #0 { 513entry: 514 br label %for.body 515 516; VF_2-LABEL: Checking a loop in "f16_factor_3" 517; VF_2: Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load half, half* %tmp0, align 2 518; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load half, half* %tmp1, align 2 519; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, half* %tmp2, align 2 520; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2 521; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2 522; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store half 0xH0000, half* %tmp2, align 2 523; VF_4-LABEL: Checking a loop in "f16_factor_3" 524; VF_4: Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load half, half* %tmp0, align 2 525; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, half* %tmp1, align 2 526; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, half* %tmp2, align 2 527; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2 528; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2 529; VF_4-NEXT: Found an estimated cost of 60 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2 530; VF_8-LABEL: Checking a loop in "f16_factor_3" 531; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load half, half* %tmp0, align 2 532; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load half, half* %tmp1, align 2 533; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, half* %tmp2, align 2 534; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2 535; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2 536; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2 537; VF_16-LABEL: Checking a loop in "f16_factor_3" 538; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load half, half* %tmp0, align 2 539; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load half, half* %tmp1, align 2 540; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, half* %tmp2, align 2 541; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2 542; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2 543; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store half 0xH0000, half* %tmp2, align 2 544for.body: 545 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 546 %tmp0 = getelementptr inbounds %f16.3, %f16.3* %data, i64 %i, i32 0 547 %tmp1 = getelementptr inbounds %f16.3, %f16.3* %data, i64 %i, i32 1 548 %tmp2 = getelementptr inbounds %f16.3, %f16.3* %data, i64 %i, i32 2 549 %tmp3 = load half, half* %tmp0, align 2 550 %tmp4 = load half, half* %tmp1, align 2 551 %tmp5 = load half, half* %tmp2, align 2 552 store half 0.0, half* %tmp0, align 2 553 store half 0.0, half* %tmp1, align 2 554 store half 0.0, half* %tmp2, align 2 555 %i.next = add nuw nsw i64 %i, 1 556 %cond = icmp slt i64 %i.next, %n 557 br i1 %cond, label %for.body, label %for.end 558 559for.end: 560 ret void 561} 562 563%f32.3 = type {float, float, float} 564define void @f32_factor_3(%f32.3* %data, i64 %n) #0 { 565entry: 566 br label %for.body 567 568; VF_2-LABEL: Checking a loop in "f32_factor_3" 569; VF_2: Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load float, float* %tmp0, align 4 570; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load float, float* %tmp1, align 4 571; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, float* %tmp2, align 4 572; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4 573; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4 574; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4 575; VF_4-LABEL: Checking a loop in "f32_factor_3" 576; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load float, float* %tmp0, align 4 577; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load float, float* %tmp1, align 4 578; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp2, align 4 579; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4 580; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4 581; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4 582; VF_8-LABEL: Checking a loop in "f32_factor_3" 583; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load float, float* %tmp0, align 4 584; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, float* %tmp1, align 4 585; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp2, align 4 586; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4 587; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4 588; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4 589; VF_16-LABEL: Checking a loop in "f32_factor_3" 590; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load float, float* %tmp0, align 4 591; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load float, float* %tmp1, align 4 592; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, float* %tmp2, align 4 593; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4 594; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4 595; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store float 0.000000e+00, float* %tmp2, align 4 596for.body: 597 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 598 %tmp0 = getelementptr inbounds %f32.3, %f32.3* %data, i64 %i, i32 0 599 %tmp1 = getelementptr inbounds %f32.3, %f32.3* %data, i64 %i, i32 1 600 %tmp2 = getelementptr inbounds %f32.3, %f32.3* %data, i64 %i, i32 2 601 %tmp3 = load float, float* %tmp0, align 4 602 %tmp4 = load float, float* %tmp1, align 4 603 %tmp5 = load float, float* %tmp2, align 4 604 store float 0.0, float* %tmp0, align 4 605 store float 0.0, float* %tmp1, align 4 606 store float 0.0, float* %tmp2, align 4 607 %i.next = add nuw nsw i64 %i, 1 608 %cond = icmp slt i64 %i.next, %n 609 br i1 %cond, label %for.body, label %for.end 610 611for.end: 612 ret void 613} 614 615%f64.3 = type {double, double, double} 616define void @f64_factor_3(%f64.3* %data, i64 %n) #0 { 617entry: 618 br label %for.body 619 620; VF_2-LABEL: Checking a loop in "f64_factor_3" 621; VF_2: Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load double, double* %tmp0, align 8 622; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load double, double* %tmp1, align 8 623; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, double* %tmp2, align 8 624; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8 625; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8 626; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store double 0.000000e+00, double* %tmp2, align 8 627; VF_4-LABEL: Checking a loop in "f64_factor_3" 628; VF_4: Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load double, double* %tmp0, align 8 629; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load double, double* %tmp1, align 8 630; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, double* %tmp2, align 8 631; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8 632; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8 633; VF_4-NEXT: Found an estimated cost of 60 for VF 4 For instruction: store double 0.000000e+00, double* %tmp2, align 8 634; VF_8-LABEL: Checking a loop in "f64_factor_3" 635; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load double, double* %tmp0, align 8 636; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load double, double* %tmp1, align 8 637; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, double* %tmp2, align 8 638; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8 639; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8 640; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store double 0.000000e+00, double* %tmp2, align 8 641; VF_16-LABEL: Checking a loop in "f64_factor_3" 642; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load double, double* %tmp0, align 8 643; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load double, double* %tmp1, align 8 644; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, double* %tmp2, align 8 645; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8 646; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8 647; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store double 0.000000e+00, double* %tmp2, align 8 648for.body: 649 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 650 %tmp0 = getelementptr inbounds %f64.3, %f64.3* %data, i64 %i, i32 0 651 %tmp1 = getelementptr inbounds %f64.3, %f64.3* %data, i64 %i, i32 1 652 %tmp2 = getelementptr inbounds %f64.3, %f64.3* %data, i64 %i, i32 2 653 %tmp3 = load double, double* %tmp0, align 8 654 %tmp4 = load double, double* %tmp1, align 8 655 %tmp5 = load double, double* %tmp2, align 8 656 store double 0.0, double* %tmp0, align 8 657 store double 0.0, double* %tmp1, align 8 658 store double 0.0, double* %tmp2, align 8 659 %i.next = add nuw nsw i64 %i, 1 660 %cond = icmp slt i64 %i.next, %n 661 br i1 %cond, label %for.body, label %for.end 662 663for.end: 664 ret void 665} 666 667 668; Factor 4 669 670%i8.4 = type {i8, i8, i8, i8} 671define void @i8_factor_4(%i8.4* %data, i64 %n) #0 { 672entry: 673 br label %for.body 674 675; VF_2-LABEL: Checking a loop in "i8_factor_4" 676; VF_2: Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 677; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, i8* %tmp1, align 1 678; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i8, i8* %tmp2, align 1 679; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i8, i8* %tmp3, align 1 680; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1 681; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1 682; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp2, align 1 683; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i8 0, i8* %tmp3, align 1 684; VF_4-LABEL: Checking a loop in "i8_factor_4" 685; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 686; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, i8* %tmp1, align 1 687; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i8, i8* %tmp2, align 1 688; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i8, i8* %tmp3, align 1 689; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1 690; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1 691; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp2, align 1 692; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store i8 0, i8* %tmp3, align 1 693; VF_8-LABEL: Checking a loop in "i8_factor_4" 694; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 695; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp1, align 1 696; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i8, i8* %tmp2, align 1 697; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i8, i8* %tmp3, align 1 698; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1 699; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1 700; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1 701; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store i8 0, i8* %tmp3, align 1 702; VF_16-LABEL: Checking a loop in "i8_factor_4" 703; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 704; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp1, align 1 705; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i8, i8* %tmp2, align 1 706; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i8, i8* %tmp3, align 1 707; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1 708; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1 709; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1 710; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store i8 0, i8* %tmp3, align 1 711for.body: 712 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 713 %tmp0 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 0 714 %tmp1 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 1 715 %tmp2 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 2 716 %tmp3 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 3 717 %tmp4 = load i8, i8* %tmp0, align 1 718 %tmp5 = load i8, i8* %tmp1, align 1 719 %tmp6 = load i8, i8* %tmp2, align 1 720 %tmp7 = load i8, i8* %tmp3, align 1 721 store i8 0, i8* %tmp0, align 1 722 store i8 0, i8* %tmp1, align 1 723 store i8 0, i8* %tmp2, align 1 724 store i8 0, i8* %tmp3, align 1 725 %i.next = add nuw nsw i64 %i, 1 726 %cond = icmp slt i64 %i.next, %n 727 br i1 %cond, label %for.body, label %for.end 728 729for.end: 730 ret void 731} 732 733%i16.4 = type {i16, i16, i16, i16} 734define void @i16_factor_4(%i16.4* %data, i64 %n) #0 { 735entry: 736 br label %for.body 737 738; VF_2-LABEL: Checking a loop in "i16_factor_4" 739; VF_2: Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 740; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, i16* %tmp1, align 2 741; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i16, i16* %tmp2, align 2 742; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i16, i16* %tmp3, align 2 743; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2 744; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2 745; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp2, align 2 746; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i16 0, i16* %tmp3, align 2 747; VF_4-LABEL: Checking a loop in "i16_factor_4" 748; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 749; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp1, align 2 750; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i16, i16* %tmp2, align 2 751; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i16, i16* %tmp3, align 2 752; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2 753; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2 754; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2 755; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store i16 0, i16* %tmp3, align 2 756; VF_8-LABEL: Checking a loop in "i16_factor_4" 757; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 758; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp1, align 2 759; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i16, i16* %tmp2, align 2 760; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i16, i16* %tmp3, align 2 761; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2 762; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2 763; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2 764; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store i16 0, i16* %tmp3, align 2 765; VF_16-LABEL: Checking a loop in "i16_factor_4" 766; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 767; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp1, align 2 768; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i16, i16* %tmp2, align 2 769; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i16, i16* %tmp3, align 2 770; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2 771; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2 772; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2 773; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store i16 0, i16* %tmp3, align 2 774for.body: 775 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 776 %tmp0 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 0 777 %tmp1 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 1 778 %tmp2 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 2 779 %tmp3 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 3 780 %tmp4 = load i16, i16* %tmp0, align 2 781 %tmp5 = load i16, i16* %tmp1, align 2 782 %tmp6 = load i16, i16* %tmp2, align 2 783 %tmp7 = load i16, i16* %tmp3, align 2 784 store i16 0, i16* %tmp0, align 2 785 store i16 0, i16* %tmp1, align 2 786 store i16 0, i16* %tmp2, align 2 787 store i16 0, i16* %tmp3, align 2 788 %i.next = add nuw nsw i64 %i, 1 789 %cond = icmp slt i64 %i.next, %n 790 br i1 %cond, label %for.body, label %for.end 791 792for.end: 793 ret void 794} 795 796%i32.4 = type {i32, i32, i32, i32} 797define void @i32_factor_4(%i32.4* %data, i64 %n) #0 { 798entry: 799 br label %for.body 800 801; VF_2-LABEL: Checking a loop in "i32_factor_4" 802; VF_2: Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 803; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp1, align 4 804; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i32, i32* %tmp2, align 4 805; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i32, i32* %tmp3, align 4 806; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4 807; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4 808; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4 809; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i32 0, i32* %tmp3, align 4 810; VF_4-LABEL: Checking a loop in "i32_factor_4" 811; VF_4: Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 812; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp1, align 4 813; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i32, i32* %tmp2, align 4 814; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i32, i32* %tmp3, align 4 815; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4 816; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4 817; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4 818; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store i32 0, i32* %tmp3, align 4 819; VF_8-LABEL: Checking a loop in "i32_factor_4" 820; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 821; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp1, align 4 822; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i32, i32* %tmp2, align 4 823; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i32, i32* %tmp3, align 4 824; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4 825; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4 826; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4 827; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store i32 0, i32* %tmp3, align 4 828; VF_16-LABEL: Checking a loop in "i32_factor_4" 829; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 830; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp1, align 4 831; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i32, i32* %tmp2, align 4 832; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i32, i32* %tmp3, align 4 833; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4 834; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4 835; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4 836; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store i32 0, i32* %tmp3, align 4 837for.body: 838 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 839 %tmp0 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 0 840 %tmp1 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 1 841 %tmp2 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 2 842 %tmp3 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 3 843 %tmp4 = load i32, i32* %tmp0, align 4 844 %tmp5 = load i32, i32* %tmp1, align 4 845 %tmp6 = load i32, i32* %tmp2, align 4 846 %tmp7 = load i32, i32* %tmp3, align 4 847 store i32 0, i32* %tmp0, align 4 848 store i32 0, i32* %tmp1, align 4 849 store i32 0, i32* %tmp2, align 4 850 store i32 0, i32* %tmp3, align 4 851 %i.next = add nuw nsw i64 %i, 1 852 %cond = icmp slt i64 %i.next, %n 853 br i1 %cond, label %for.body, label %for.end 854 855for.end: 856 ret void 857} 858 859%i64.4 = type {i64, i64, i64, i64} 860define void @i64_factor_4(%i64.4* %data, i64 %n) #0 { 861entry: 862 br label %for.body 863 864; VF_2-LABEL: Checking a loop in "i64_factor_4" 865; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 866; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, i64* %tmp1, align 8 867; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i64, i64* %tmp2, align 8 868; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i64, i64* %tmp3, align 8 869; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8 870; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8 871; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp2, align 8 872; VF_2-NEXT: Found an estimated cost of 32 for VF 2 For instruction: store i64 0, i64* %tmp3, align 8 873; VF_4-LABEL: Checking a loop in "i64_factor_4" 874; VF_4: Found an estimated cost of 160 for VF 4 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 875; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, i64* %tmp1, align 8 876; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i64, i64* %tmp2, align 8 877; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i64, i64* %tmp3, align 8 878; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8 879; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8 880; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp2, align 8 881; VF_4-NEXT: Found an estimated cost of 96 for VF 4 For instruction: store i64 0, i64* %tmp3, align 8 882; VF_8-LABEL: Checking a loop in "i64_factor_4" 883; VF_8: Found an estimated cost of 576 for VF 8 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 884; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, i64* %tmp1, align 8 885; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i64, i64* %tmp2, align 8 886; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i64, i64* %tmp3, align 8 887; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8 888; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8 889; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp2, align 8 890; VF_8-NEXT: Found an estimated cost of 320 for VF 8 For instruction: store i64 0, i64* %tmp3, align 8 891; VF_16-LABEL: Checking a loop in "i64_factor_4" 892; VF_16: Found an estimated cost of 2176 for VF 16 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 893; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, i64* %tmp1, align 8 894; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i64, i64* %tmp2, align 8 895; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i64, i64* %tmp3, align 8 896; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8 897; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8 898; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp2, align 8 899; VF_16-NEXT: Found an estimated cost of 1152 for VF 16 For instruction: store i64 0, i64* %tmp3, align 8 900for.body: 901 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 902 %tmp0 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 0 903 %tmp1 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 1 904 %tmp2 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 2 905 %tmp3 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 3 906 %tmp4 = load i64, i64* %tmp0, align 8 907 %tmp5 = load i64, i64* %tmp1, align 8 908 %tmp6 = load i64, i64* %tmp2, align 8 909 %tmp7 = load i64, i64* %tmp3, align 8 910 store i64 0, i64* %tmp0, align 8 911 store i64 0, i64* %tmp1, align 8 912 store i64 0, i64* %tmp2, align 8 913 store i64 0, i64* %tmp3, align 8 914 %i.next = add nuw nsw i64 %i, 1 915 %cond = icmp slt i64 %i.next, %n 916 br i1 %cond, label %for.body, label %for.end 917 918for.end: 919 ret void 920} 921 922%f16.4 = type {half, half, half, half} 923define void @f16_factor_4(%f16.4* %data, i64 %n) #0 { 924entry: 925 br label %for.body 926 927; VF_2-LABEL: Checking a loop in "f16_factor_4" 928; VF_2: Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load half, half* %tmp0, align 2 929; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, half* %tmp1, align 2 930; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load half, half* %tmp2, align 2 931; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load half, half* %tmp3, align 2 932; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2 933; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2 934; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp2, align 2 935; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store half 0xH0000, half* %tmp3, align 2 936; VF_4-LABEL: Checking a loop in "f16_factor_4" 937; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load half, half* %tmp0, align 2 938; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, half* %tmp1, align 2 939; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load half, half* %tmp2, align 2 940; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load half, half* %tmp3, align 2 941; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2 942; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2 943; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2 944; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store half 0xH0000, half* %tmp3, align 2 945; VF_8-LABEL: Checking a loop in "f16_factor_4" 946; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load half, half* %tmp0, align 2 947; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, half* %tmp1, align 2 948; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load half, half* %tmp2, align 2 949; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load half, half* %tmp3, align 2 950; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2 951; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2 952; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2 953; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store half 0xH0000, half* %tmp3, align 2 954; VF_16-LABEL: Checking a loop in "f16_factor_4" 955; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load half, half* %tmp0, align 2 956; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, half* %tmp1, align 2 957; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load half, half* %tmp2, align 2 958; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load half, half* %tmp3, align 2 959; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2 960; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2 961; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp2, align 2 962; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store half 0xH0000, half* %tmp3, align 2 963for.body: 964 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 965 %tmp0 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 0 966 %tmp1 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 1 967 %tmp2 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 2 968 %tmp3 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 3 969 %tmp4 = load half, half* %tmp0, align 2 970 %tmp5 = load half, half* %tmp1, align 2 971 %tmp6 = load half, half* %tmp2, align 2 972 %tmp7 = load half, half* %tmp3, align 2 973 store half 0.0, half* %tmp0, align 2 974 store half 0.0, half* %tmp1, align 2 975 store half 0.0, half* %tmp2, align 2 976 store half 0.0, half* %tmp3, align 2 977 %i.next = add nuw nsw i64 %i, 1 978 %cond = icmp slt i64 %i.next, %n 979 br i1 %cond, label %for.body, label %for.end 980 981for.end: 982 ret void 983} 984 985%f32.4 = type {float, float, float, float} 986define void @f32_factor_4(%f32.4* %data, i64 %n) #0 { 987entry: 988 br label %for.body 989 990; VF_2-LABEL: Checking a loop in "f32_factor_4" 991; VF_2: Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load float, float* %tmp0, align 4 992; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, float* %tmp1, align 4 993; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load float, float* %tmp2, align 4 994; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load float, float* %tmp3, align 4 995; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4 996; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4 997; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4 998; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store float 0.000000e+00, float* %tmp3, align 4 999; VF_4-LABEL: Checking a loop in "f32_factor_4" 1000; VF_4: Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load float, float* %tmp0, align 4 1001; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp1, align 4 1002; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load float, float* %tmp2, align 4 1003; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load float, float* %tmp3, align 4 1004; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4 1005; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4 1006; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4 1007; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store float 0.000000e+00, float* %tmp3, align 4 1008; VF_8-LABEL: Checking a loop in "f32_factor_4" 1009; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load float, float* %tmp0, align 4 1010; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp1, align 4 1011; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load float, float* %tmp2, align 4 1012; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load float, float* %tmp3, align 4 1013; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4 1014; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4 1015; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4 1016; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store float 0.000000e+00, float* %tmp3, align 4 1017; VF_16-LABEL: Checking a loop in "f32_factor_4" 1018; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load float, float* %tmp0, align 4 1019; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, float* %tmp1, align 4 1020; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load float, float* %tmp2, align 4 1021; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load float, float* %tmp3, align 4 1022; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4 1023; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4 1024; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp2, align 4 1025; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store float 0.000000e+00, float* %tmp3, align 4 1026for.body: 1027 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 1028 %tmp0 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 0 1029 %tmp1 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 1 1030 %tmp2 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 2 1031 %tmp3 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 3 1032 %tmp4 = load float, float* %tmp0, align 4 1033 %tmp5 = load float, float* %tmp1, align 4 1034 %tmp6 = load float, float* %tmp2, align 4 1035 %tmp7 = load float, float* %tmp3, align 4 1036 store float 0.0, float* %tmp0, align 4 1037 store float 0.0, float* %tmp1, align 4 1038 store float 0.0, float* %tmp2, align 4 1039 store float 0.0, float* %tmp3, align 4 1040 %i.next = add nuw nsw i64 %i, 1 1041 %cond = icmp slt i64 %i.next, %n 1042 br i1 %cond, label %for.body, label %for.end 1043 1044for.end: 1045 ret void 1046} 1047 1048%f64.4 = type {double, double, double, double} 1049define void @f64_factor_4(%f64.4* %data, i64 %n) #0 { 1050entry: 1051 br label %for.body 1052 1053; VF_2-LABEL: Checking a loop in "f64_factor_4" 1054; VF_2: Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load double, double* %tmp0, align 8 1055; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, double* %tmp1, align 8 1056; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load double, double* %tmp2, align 8 1057; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load double, double* %tmp3, align 8 1058; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8 1059; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8 1060; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp2, align 8 1061; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store double 0.000000e+00, double* %tmp3, align 8 1062; VF_4-LABEL: Checking a loop in "f64_factor_4" 1063; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load double, double* %tmp0, align 8 1064; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, double* %tmp1, align 8 1065; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load double, double* %tmp2, align 8 1066; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load double, double* %tmp3, align 8 1067; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8 1068; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8 1069; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp2, align 8 1070; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store double 0.000000e+00, double* %tmp3, align 8 1071; VF_8-LABEL: Checking a loop in "f64_factor_4" 1072; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load double, double* %tmp0, align 8 1073; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, double* %tmp1, align 8 1074; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load double, double* %tmp2, align 8 1075; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load double, double* %tmp3, align 8 1076; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8 1077; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8 1078; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp2, align 8 1079; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store double 0.000000e+00, double* %tmp3, align 8 1080; VF_16-LABEL: Checking a loop in "f64_factor_4" 1081; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load double, double* %tmp0, align 8 1082; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, double* %tmp1, align 8 1083; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load double, double* %tmp2, align 8 1084; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load double, double* %tmp3, align 8 1085; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8 1086; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8 1087; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp2, align 8 1088; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store double 0.000000e+00, double* %tmp3, align 8 1089for.body: 1090 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 1091 %tmp0 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 0 1092 %tmp1 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 1 1093 %tmp2 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 2 1094 %tmp3 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 3 1095 %tmp4 = load double, double* %tmp0, align 8 1096 %tmp5 = load double, double* %tmp1, align 8 1097 %tmp6 = load double, double* %tmp2, align 8 1098 %tmp7 = load double, double* %tmp3, align 8 1099 store double 0.0, double* %tmp0, align 8 1100 store double 0.0, double* %tmp1, align 8 1101 store double 0.0, double* %tmp2, align 8 1102 store double 0.0, double* %tmp3, align 8 1103 %i.next = add nuw nsw i64 %i, 1 1104 %cond = icmp slt i64 %i.next, %n 1105 br i1 %cond, label %for.body, label %for.end 1106 1107for.end: 1108 ret void 1109} 1110 1111attributes #0 = { "target-features"="+mve.fp" } 1112