1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mcpu=corei7 -O1 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1 3; RUN: opt < %s -mcpu=corei7 -O2 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2 4; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-threshold=150 -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3 5; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DEFAULT 6; RUN: opt < %s -mcpu=corei7 -Os -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os 7; RUN: opt < %s -mcpu=corei7 -Oz -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz 8; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2 9; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2 10; RUN: opt < %s -mcpu=corei7 -O3 -unroll-threshold=150 -vectorize-loops=false -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS 11; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -unroll-allow-partial=0 -enable-new-pm=1 | FileCheck %s --check-prefix=O1VEC2 12 13; This file tests the llvm.loop.vectorize.enable metadata forcing 14; vectorization even when optimization levels are too low, or when 15; vectorization is disabled. 16 17target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 18target triple = "x86_64-unknown-linux-gnu" 19 20define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { 21; O1-LABEL: @enabled( 22; O1-NEXT: entry: 23; O1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 24; O1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 25; O1-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 26; O1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 27; O1-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 28; O1-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 29; O1-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 30; O1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 31; O1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 32; O1-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 33; O1-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 34; O1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 35; O1-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 36; O1-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 37; O1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 38; O1-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 39; O1-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 40; O1-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 41; O1-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 42; O1-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 43; O1-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 44; O1-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 45; O1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 46; O1-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 47; O1-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 48; O1-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 49; O1-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 50; O1-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 51; O1-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 52; O1-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 53; O1-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 54; O1-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 55; O1-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 56; O1-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 57; O1-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 58; O1-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 59; O1-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 60; O1-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 61; O1-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 62; O1-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 63; O1-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 64; O1-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 65; O1-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 66; O1-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 67; O1-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 68; O1-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 69; O1-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 70; O1-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 71; O1-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 72; O1-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 73; O1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 74; O1-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 75; O1-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 76; O1-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 77; O1-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 78; O1-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 79; O1-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 80; O1-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 81; O1-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 82; O1-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 83; O1-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 84; O1-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 85; O1-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 86; O1-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 87; O1-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 88; O1-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 89; O1-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 90; O1-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 91; O1-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 92; O1-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 93; O1-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 94; O1-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 95; O1-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 96; O1-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 97; O1-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 98; O1-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 99; O1-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 100; O1-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 101; O1-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 102; O1-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 103; O1-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 104; O1-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 105; O1-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 106; O1-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 107; O1-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 108; O1-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 109; O1-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 110; O1-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 111; O1-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 112; O1-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 113; O1-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 114; O1-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 115; O1-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 116; O1-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 117; O1-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 118; O1-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 119; O1-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 120; O1-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 121; O1-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 122; O1-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 123; O1-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 124; O1-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 125; O1-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 126; O1-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 127; O1-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 128; O1-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 129; O1-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 130; O1-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 131; O1-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 132; O1-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 133; O1-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 134; O1-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 135; O1-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 136; O1-NEXT: ret i32 [[TMP78]] 137; 138; O2-LABEL: @enabled( 139; O2-NEXT: entry: 140; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 141; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 142; O2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 143; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 144; O2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 145; O2-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 146; O2-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 147; O2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 148; O2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 149; O2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 150; O2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 151; O2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 152; O2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 153; O2-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 154; O2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 155; O2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 156; O2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 157; O2-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 158; O2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 159; O2-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 160; O2-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 161; O2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 162; O2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 163; O2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 164; O2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 165; O2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 166; O2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 167; O2-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 168; O2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 169; O2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 170; O2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 171; O2-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 172; O2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 173; O2-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 174; O2-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 175; O2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 176; O2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 177; O2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 178; O2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 179; O2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 180; O2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 181; O2-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 182; O2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 183; O2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 184; O2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 185; O2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 186; O2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 187; O2-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 188; O2-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 189; O2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 190; O2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 191; O2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 192; O2-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 193; O2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 194; O2-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 195; O2-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 196; O2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 197; O2-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 198; O2-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 199; O2-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 200; O2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 201; O2-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 202; O2-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 203; O2-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 204; O2-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 205; O2-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 206; O2-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 207; O2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 208; O2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 209; O2-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 210; O2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 211; O2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 212; O2-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 213; O2-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 214; O2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 215; O2-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 216; O2-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 217; O2-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 218; O2-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 219; O2-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 220; O2-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 221; O2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 222; O2-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 223; O2-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 224; O2-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 225; O2-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 226; O2-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 227; O2-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 228; O2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 229; O2-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 230; O2-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 231; O2-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 232; O2-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 233; O2-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 234; O2-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 235; O2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 236; O2-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 237; O2-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 238; O2-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 239; O2-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 240; O2-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 241; O2-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 242; O2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 243; O2-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 244; O2-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 245; O2-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 246; O2-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 247; O2-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 248; O2-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 249; O2-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 250; O2-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 251; O2-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 252; O2-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 253; O2-NEXT: ret i32 [[TMP78]] 254; 255; O3-LABEL: @enabled( 256; O3-NEXT: entry: 257; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 258; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 259; O3-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 260; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 261; O3-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 262; O3-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 263; O3-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 264; O3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 265; O3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 266; O3-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 267; O3-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 268; O3-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 269; O3-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 270; O3-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 271; O3-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 272; O3-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 273; O3-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 274; O3-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 275; O3-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 276; O3-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 277; O3-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 278; O3-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 279; O3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 280; O3-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 281; O3-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 282; O3-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 283; O3-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 284; O3-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 285; O3-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 286; O3-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 287; O3-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 288; O3-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 289; O3-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 290; O3-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 291; O3-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 292; O3-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 293; O3-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 294; O3-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 295; O3-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 296; O3-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 297; O3-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 298; O3-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 299; O3-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 300; O3-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 301; O3-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 302; O3-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 303; O3-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 304; O3-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 305; O3-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 306; O3-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 307; O3-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 308; O3-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 309; O3-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 310; O3-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 311; O3-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 312; O3-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 313; O3-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 314; O3-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 315; O3-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 316; O3-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 317; O3-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 318; O3-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 319; O3-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 320; O3-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 321; O3-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 322; O3-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 323; O3-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 324; O3-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 325; O3-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 326; O3-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 327; O3-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 328; O3-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 329; O3-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 330; O3-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 331; O3-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 332; O3-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 333; O3-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 334; O3-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 335; O3-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 336; O3-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 337; O3-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 338; O3-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 339; O3-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 340; O3-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 341; O3-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 342; O3-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 343; O3-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 344; O3-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 345; O3-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 346; O3-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 347; O3-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 348; O3-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 349; O3-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 350; O3-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 351; O3-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 352; O3-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 353; O3-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 354; O3-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 355; O3-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 356; O3-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 357; O3-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 358; O3-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 359; O3-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 360; O3-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 361; O3-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 362; O3-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 363; O3-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 364; O3-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 365; O3-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 366; O3-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 367; O3-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 368; O3-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 369; O3-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 370; O3-NEXT: ret i32 [[TMP78]] 371; 372; O3DEFAULT-LABEL: @enabled( 373; O3DEFAULT-NEXT: entry: 374; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 375; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 376; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 377; O3DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 378; O3DEFAULT-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 379; O3DEFAULT-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 380; O3DEFAULT-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 381; O3DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 382; O3DEFAULT-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 383; O3DEFAULT-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 384; O3DEFAULT-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 385; O3DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 386; O3DEFAULT-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 387; O3DEFAULT-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 388; O3DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 389; O3DEFAULT-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 390; O3DEFAULT-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 391; O3DEFAULT-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 392; O3DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 393; O3DEFAULT-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 394; O3DEFAULT-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 395; O3DEFAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 396; O3DEFAULT-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 397; O3DEFAULT-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 398; O3DEFAULT-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 399; O3DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 400; O3DEFAULT-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 401; O3DEFAULT-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 402; O3DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 403; O3DEFAULT-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 404; O3DEFAULT-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 405; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 406; O3DEFAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 407; O3DEFAULT-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 408; O3DEFAULT-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 409; O3DEFAULT-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 410; O3DEFAULT-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 411; O3DEFAULT-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 412; O3DEFAULT-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 413; O3DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 414; O3DEFAULT-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 415; O3DEFAULT-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 416; O3DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 417; O3DEFAULT-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 418; O3DEFAULT-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 419; O3DEFAULT-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 420; O3DEFAULT-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 421; O3DEFAULT-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 422; O3DEFAULT-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 423; O3DEFAULT-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 424; O3DEFAULT-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 425; O3DEFAULT-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 426; O3DEFAULT-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 427; O3DEFAULT-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 428; O3DEFAULT-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 429; O3DEFAULT-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 430; O3DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 431; O3DEFAULT-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 432; O3DEFAULT-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 433; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 434; O3DEFAULT-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 435; O3DEFAULT-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 436; O3DEFAULT-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 437; O3DEFAULT-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 438; O3DEFAULT-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 439; O3DEFAULT-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 440; O3DEFAULT-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 441; O3DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 442; O3DEFAULT-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 443; O3DEFAULT-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 444; O3DEFAULT-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 445; O3DEFAULT-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 446; O3DEFAULT-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 447; O3DEFAULT-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 448; O3DEFAULT-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 449; O3DEFAULT-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 450; O3DEFAULT-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 451; O3DEFAULT-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 452; O3DEFAULT-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 453; O3DEFAULT-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 454; O3DEFAULT-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 455; O3DEFAULT-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 456; O3DEFAULT-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 457; O3DEFAULT-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 458; O3DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 459; O3DEFAULT-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 460; O3DEFAULT-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 461; O3DEFAULT-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 462; O3DEFAULT-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 463; O3DEFAULT-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 464; O3DEFAULT-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 465; O3DEFAULT-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 466; O3DEFAULT-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 467; O3DEFAULT-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 468; O3DEFAULT-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 469; O3DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 470; O3DEFAULT-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 471; O3DEFAULT-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 472; O3DEFAULT-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 473; O3DEFAULT-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 474; O3DEFAULT-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 475; O3DEFAULT-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 476; O3DEFAULT-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 477; O3DEFAULT-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 478; O3DEFAULT-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 479; O3DEFAULT-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 480; O3DEFAULT-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 481; O3DEFAULT-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 482; O3DEFAULT-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 483; O3DEFAULT-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 484; O3DEFAULT-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 485; O3DEFAULT-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 486; O3DEFAULT-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 487; O3DEFAULT-NEXT: ret i32 [[TMP78]] 488; 489; Os-LABEL: @enabled( 490; Os-NEXT: entry: 491; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 492; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 493; Os-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 494; Os-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 495; Os-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 496; Os-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 497; Os-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 498; Os-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 499; Os-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 500; Os-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 501; Os-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 502; Os-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 503; Os-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 504; Os-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 505; Os-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 506; Os-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 507; Os-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 508; Os-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 509; Os-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 510; Os-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 511; Os-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 512; Os-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 513; Os-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 514; Os-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 515; Os-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 516; Os-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 517; Os-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 518; Os-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 519; Os-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 520; Os-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 521; Os-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 522; Os-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 523; Os-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 524; Os-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 525; Os-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 526; Os-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 527; Os-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 528; Os-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 529; Os-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 530; Os-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 531; Os-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 532; Os-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 533; Os-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 534; Os-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 535; Os-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 536; Os-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 537; Os-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 538; Os-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 539; Os-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 540; Os-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 541; Os-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 542; Os-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 543; Os-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 544; Os-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 545; Os-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 546; Os-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 547; Os-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 548; Os-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 549; Os-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 550; Os-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 551; Os-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 552; Os-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 553; Os-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 554; Os-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 555; Os-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 556; Os-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 557; Os-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 558; Os-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 559; Os-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 560; Os-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 561; Os-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 562; Os-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 563; Os-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 564; Os-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 565; Os-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 566; Os-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 567; Os-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 568; Os-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 569; Os-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 570; Os-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 571; Os-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 572; Os-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 573; Os-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 574; Os-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 575; Os-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 576; Os-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 577; Os-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 578; Os-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 579; Os-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 580; Os-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 581; Os-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 582; Os-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 583; Os-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 584; Os-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 585; Os-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 586; Os-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 587; Os-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 588; Os-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 589; Os-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 590; Os-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 591; Os-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 592; Os-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 593; Os-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 594; Os-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 595; Os-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 596; Os-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 597; Os-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 598; Os-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 599; Os-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 600; Os-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 601; Os-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 602; Os-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 603; Os-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 604; Os-NEXT: ret i32 [[TMP78]] 605; 606; Oz-LABEL: @enabled( 607; Oz-NEXT: entry: 608; Oz-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 609; Oz-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 610; Oz-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 611; Oz-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 612; Oz-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 613; Oz-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 614; Oz-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 615; Oz-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 616; Oz-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 617; Oz-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 618; Oz-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 619; Oz-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 620; Oz-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 621; Oz-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 622; Oz-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 623; Oz-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 624; Oz-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 625; Oz-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 626; Oz-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 627; Oz-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 628; Oz-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 629; Oz-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 630; Oz-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 631; Oz-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 632; Oz-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 633; Oz-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 634; Oz-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 635; Oz-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 636; Oz-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 637; Oz-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 638; Oz-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 639; Oz-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 640; Oz-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 641; Oz-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 642; Oz-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 643; Oz-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 644; Oz-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 645; Oz-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 646; Oz-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 647; Oz-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 648; Oz-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 649; Oz-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 650; Oz-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 651; Oz-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 652; Oz-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 653; Oz-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 654; Oz-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 655; Oz-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 656; Oz-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 657; Oz-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 658; Oz-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 659; Oz-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 660; Oz-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 661; Oz-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 662; Oz-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 663; Oz-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 664; Oz-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 665; Oz-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 666; Oz-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 667; Oz-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 668; Oz-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 669; Oz-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 670; Oz-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 671; Oz-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 672; Oz-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 673; Oz-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 674; Oz-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 675; Oz-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 676; Oz-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 677; Oz-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 678; Oz-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 679; Oz-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 680; Oz-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 681; Oz-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 682; Oz-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 683; Oz-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 684; Oz-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 685; Oz-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 686; Oz-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 687; Oz-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 688; Oz-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 689; Oz-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 690; Oz-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 691; Oz-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 692; Oz-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 693; Oz-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 694; Oz-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 695; Oz-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 696; Oz-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 697; Oz-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 698; Oz-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 699; Oz-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 700; Oz-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 701; Oz-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 702; Oz-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 703; Oz-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 704; Oz-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 705; Oz-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 706; Oz-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 707; Oz-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 708; Oz-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 709; Oz-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 710; Oz-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 711; Oz-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 712; Oz-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 713; Oz-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 714; Oz-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 715; Oz-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 716; Oz-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 717; Oz-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 718; Oz-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 719; Oz-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 720; Oz-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 721; Oz-NEXT: ret i32 [[TMP78]] 722; 723; O1VEC2-LABEL: @enabled( 724; O1VEC2-NEXT: entry: 725; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 726; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 727; O1VEC2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 728; O1VEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 729; O1VEC2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 730; O1VEC2-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 731; O1VEC2-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 732; O1VEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 733; O1VEC2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 734; O1VEC2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 735; O1VEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 736; O1VEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 737; O1VEC2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 738; O1VEC2-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 739; O1VEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 740; O1VEC2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 741; O1VEC2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 742; O1VEC2-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 743; O1VEC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 744; O1VEC2-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 745; O1VEC2-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 746; O1VEC2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 747; O1VEC2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 748; O1VEC2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 749; O1VEC2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 750; O1VEC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 751; O1VEC2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 752; O1VEC2-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 753; O1VEC2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 754; O1VEC2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 755; O1VEC2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 756; O1VEC2-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 757; O1VEC2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 758; O1VEC2-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 759; O1VEC2-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 760; O1VEC2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 761; O1VEC2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 762; O1VEC2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 763; O1VEC2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 764; O1VEC2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 765; O1VEC2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 766; O1VEC2-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 767; O1VEC2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 768; O1VEC2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 769; O1VEC2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 770; O1VEC2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 771; O1VEC2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 772; O1VEC2-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 773; O1VEC2-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 774; O1VEC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 775; O1VEC2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 776; O1VEC2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 777; O1VEC2-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 778; O1VEC2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 779; O1VEC2-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 780; O1VEC2-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 781; O1VEC2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 782; O1VEC2-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 783; O1VEC2-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 784; O1VEC2-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 785; O1VEC2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 786; O1VEC2-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 787; O1VEC2-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 788; O1VEC2-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 789; O1VEC2-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 790; O1VEC2-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 791; O1VEC2-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 792; O1VEC2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 793; O1VEC2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 794; O1VEC2-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 795; O1VEC2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 796; O1VEC2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 797; O1VEC2-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 798; O1VEC2-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 799; O1VEC2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 800; O1VEC2-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 801; O1VEC2-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 802; O1VEC2-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 803; O1VEC2-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 804; O1VEC2-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 805; O1VEC2-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 806; O1VEC2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 807; O1VEC2-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 808; O1VEC2-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 809; O1VEC2-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 810; O1VEC2-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 811; O1VEC2-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 812; O1VEC2-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 813; O1VEC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 814; O1VEC2-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 815; O1VEC2-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 816; O1VEC2-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 817; O1VEC2-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 818; O1VEC2-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 819; O1VEC2-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 820; O1VEC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 821; O1VEC2-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 822; O1VEC2-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 823; O1VEC2-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 824; O1VEC2-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 825; O1VEC2-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 826; O1VEC2-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 827; O1VEC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 828; O1VEC2-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 829; O1VEC2-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 830; O1VEC2-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 831; O1VEC2-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 832; O1VEC2-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 833; O1VEC2-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 834; O1VEC2-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 835; O1VEC2-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 836; O1VEC2-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 837; O1VEC2-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 838; O1VEC2-NEXT: ret i32 [[TMP78]] 839; 840; OzVEC2-LABEL: @enabled( 841; OzVEC2-NEXT: entry: 842; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 843; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 844; OzVEC2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 845; OzVEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 846; OzVEC2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 847; OzVEC2-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 848; OzVEC2-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 849; OzVEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 850; OzVEC2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 851; OzVEC2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 852; OzVEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 853; OzVEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 854; OzVEC2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 855; OzVEC2-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 856; OzVEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 857; OzVEC2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 858; OzVEC2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 859; OzVEC2-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 860; OzVEC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 861; OzVEC2-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 862; OzVEC2-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 863; OzVEC2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 864; OzVEC2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 865; OzVEC2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 866; OzVEC2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 867; OzVEC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 868; OzVEC2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 869; OzVEC2-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 870; OzVEC2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 871; OzVEC2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 872; OzVEC2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 873; OzVEC2-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 874; OzVEC2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 875; OzVEC2-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 876; OzVEC2-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 877; OzVEC2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 878; OzVEC2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 879; OzVEC2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 880; OzVEC2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 881; OzVEC2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 882; OzVEC2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 883; OzVEC2-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 884; OzVEC2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 885; OzVEC2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 886; OzVEC2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 887; OzVEC2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 888; OzVEC2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 889; OzVEC2-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 890; OzVEC2-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 891; OzVEC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 892; OzVEC2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 893; OzVEC2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 894; OzVEC2-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 895; OzVEC2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 896; OzVEC2-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 897; OzVEC2-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 898; OzVEC2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 899; OzVEC2-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 900; OzVEC2-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 901; OzVEC2-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 902; OzVEC2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 903; OzVEC2-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 904; OzVEC2-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 905; OzVEC2-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 906; OzVEC2-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 907; OzVEC2-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 908; OzVEC2-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 909; OzVEC2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 910; OzVEC2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 911; OzVEC2-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 912; OzVEC2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 913; OzVEC2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 914; OzVEC2-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 915; OzVEC2-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 916; OzVEC2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 917; OzVEC2-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 918; OzVEC2-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 919; OzVEC2-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 920; OzVEC2-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 921; OzVEC2-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 922; OzVEC2-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 923; OzVEC2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 924; OzVEC2-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 925; OzVEC2-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 926; OzVEC2-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 927; OzVEC2-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 928; OzVEC2-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 929; OzVEC2-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 930; OzVEC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 931; OzVEC2-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 932; OzVEC2-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 933; OzVEC2-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 934; OzVEC2-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 935; OzVEC2-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 936; OzVEC2-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 937; OzVEC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 938; OzVEC2-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 939; OzVEC2-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 940; OzVEC2-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 941; OzVEC2-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 942; OzVEC2-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 943; OzVEC2-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 944; OzVEC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 945; OzVEC2-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 946; OzVEC2-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 947; OzVEC2-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 948; OzVEC2-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 949; OzVEC2-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 950; OzVEC2-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 951; OzVEC2-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 952; OzVEC2-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 953; OzVEC2-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 954; OzVEC2-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 955; OzVEC2-NEXT: ret i32 [[TMP78]] 956; 957; O3DIS-LABEL: @enabled( 958; O3DIS-NEXT: entry: 959; O3DIS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 960; O3DIS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 961; O3DIS-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 962; O3DIS-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 963; O3DIS-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 964; O3DIS-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 965; O3DIS-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 966; O3DIS-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 967; O3DIS-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 968; O3DIS-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 969; O3DIS-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 970; O3DIS-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 971; O3DIS-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 972; O3DIS-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 973; O3DIS-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 974; O3DIS-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 975; O3DIS-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 976; O3DIS-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 977; O3DIS-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 978; O3DIS-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 979; O3DIS-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 980; O3DIS-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 981; O3DIS-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 982; O3DIS-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 983; O3DIS-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 984; O3DIS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 985; O3DIS-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 986; O3DIS-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 987; O3DIS-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 988; O3DIS-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 989; O3DIS-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 990; O3DIS-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 991; O3DIS-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 992; O3DIS-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 993; O3DIS-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 994; O3DIS-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 995; O3DIS-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 996; O3DIS-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 997; O3DIS-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 998; O3DIS-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 999; O3DIS-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 1000; O3DIS-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 1001; O3DIS-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 1002; O3DIS-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 1003; O3DIS-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 1004; O3DIS-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 1005; O3DIS-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 1006; O3DIS-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 1007; O3DIS-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 1008; O3DIS-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 1009; O3DIS-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 1010; O3DIS-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 1011; O3DIS-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 1012; O3DIS-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 1013; O3DIS-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 1014; O3DIS-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 1015; O3DIS-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 1016; O3DIS-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 1017; O3DIS-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 1018; O3DIS-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 1019; O3DIS-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 1020; O3DIS-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 1021; O3DIS-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 1022; O3DIS-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 1023; O3DIS-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 1024; O3DIS-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 1025; O3DIS-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 1026; O3DIS-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 1027; O3DIS-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 1028; O3DIS-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 1029; O3DIS-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 1030; O3DIS-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 1031; O3DIS-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 1032; O3DIS-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 1033; O3DIS-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 1034; O3DIS-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 1035; O3DIS-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 1036; O3DIS-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 1037; O3DIS-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 1038; O3DIS-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 1039; O3DIS-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 1040; O3DIS-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 1041; O3DIS-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 1042; O3DIS-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 1043; O3DIS-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 1044; O3DIS-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 1045; O3DIS-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 1046; O3DIS-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 1047; O3DIS-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 1048; O3DIS-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 1049; O3DIS-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 1050; O3DIS-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 1051; O3DIS-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 1052; O3DIS-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 1053; O3DIS-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 1054; O3DIS-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 1055; O3DIS-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 1056; O3DIS-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 1057; O3DIS-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 1058; O3DIS-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 1059; O3DIS-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 1060; O3DIS-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 1061; O3DIS-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 1062; O3DIS-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 1063; O3DIS-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 1064; O3DIS-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 1065; O3DIS-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 1066; O3DIS-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 1067; O3DIS-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 1068; O3DIS-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 1069; O3DIS-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 1070; O3DIS-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 1071; O3DIS-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 1072; O3DIS-NEXT: ret i32 [[TMP78]] 1073; 1074entry: 1075 br label %for.body 1076 1077for.body: ; preds = %for.body, %entry 1078 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 1079 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv 1080 %0 = load i32, i32* %arrayidx, align 4 1081 %add = add nsw i32 %0, %N 1082 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 1083 store i32 %add, i32* %arrayidx2, align 4 1084 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1085 %exitcond = icmp eq i64 %indvars.iv.next, 64 1086 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 1087 1088for.end: ; preds = %for.body 1089 %1 = load i32, i32* %a, align 4 1090 ret i32 %1 1091} 1092 1093define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { 1094; O1-LABEL: @nopragma( 1095; O1-NEXT: entry: 1096; O1-NEXT: br label [[FOR_BODY:%.*]] 1097; O1: for.body: 1098; O1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1099; O1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1100; O1-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1101; O1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1102; O1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1103; O1-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1104; O1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1105; O1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 1106; O1-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] 1107; O1: for.end: 1108; O1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1109; O1-NEXT: ret i32 [[TMP1]] 1110; 1111; O2-LABEL: @nopragma( 1112; O2-NEXT: entry: 1113; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 1114; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 1115; O2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 1116; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1117; O2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1118; O2-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 1119; O2-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 1120; O2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 1121; O2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 1122; O2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 1123; O2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 1124; O2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 1125; O2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 1126; O2-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 1127; O2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 1128; O2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 1129; O2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 1130; O2-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 1131; O2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 1132; O2-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 1133; O2-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 1134; O2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 1135; O2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 1136; O2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 1137; O2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 1138; O2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 1139; O2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 1140; O2-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 1141; O2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 1142; O2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 1143; O2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 1144; O2-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 1145; O2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 1146; O2-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 1147; O2-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 1148; O2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 1149; O2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 1150; O2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 1151; O2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 1152; O2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 1153; O2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 1154; O2-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 1155; O2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 1156; O2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 1157; O2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 1158; O2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 1159; O2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 1160; O2-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 1161; O2-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 1162; O2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 1163; O2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 1164; O2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 1165; O2-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 1166; O2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 1167; O2-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 1168; O2-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 1169; O2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 1170; O2-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 1171; O2-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 1172; O2-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 1173; O2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 1174; O2-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 1175; O2-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 1176; O2-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 1177; O2-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 1178; O2-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 1179; O2-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 1180; O2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 1181; O2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 1182; O2-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 1183; O2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 1184; O2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 1185; O2-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 1186; O2-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 1187; O2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 1188; O2-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 1189; O2-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 1190; O2-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 1191; O2-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 1192; O2-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 1193; O2-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 1194; O2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 1195; O2-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 1196; O2-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 1197; O2-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 1198; O2-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 1199; O2-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 1200; O2-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 1201; O2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 1202; O2-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 1203; O2-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 1204; O2-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 1205; O2-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 1206; O2-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 1207; O2-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 1208; O2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 1209; O2-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 1210; O2-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 1211; O2-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 1212; O2-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 1213; O2-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 1214; O2-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 1215; O2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 1216; O2-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 1217; O2-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 1218; O2-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 1219; O2-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 1220; O2-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 1221; O2-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 1222; O2-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 1223; O2-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 1224; O2-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 1225; O2-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 1226; O2-NEXT: ret i32 [[TMP78]] 1227; 1228; O3-LABEL: @nopragma( 1229; O3-NEXT: entry: 1230; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 1231; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 1232; O3-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 1233; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1234; O3-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1235; O3-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 1236; O3-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 1237; O3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 1238; O3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 1239; O3-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 1240; O3-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 1241; O3-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 1242; O3-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 1243; O3-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 1244; O3-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 1245; O3-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 1246; O3-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 1247; O3-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 1248; O3-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 1249; O3-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 1250; O3-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 1251; O3-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 1252; O3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 1253; O3-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 1254; O3-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 1255; O3-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 1256; O3-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 1257; O3-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 1258; O3-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 1259; O3-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 1260; O3-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 1261; O3-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 1262; O3-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 1263; O3-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 1264; O3-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 1265; O3-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 1266; O3-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 1267; O3-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 1268; O3-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 1269; O3-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 1270; O3-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 1271; O3-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 1272; O3-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 1273; O3-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 1274; O3-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 1275; O3-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 1276; O3-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 1277; O3-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 1278; O3-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 1279; O3-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 1280; O3-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 1281; O3-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 1282; O3-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 1283; O3-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 1284; O3-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 1285; O3-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 1286; O3-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 1287; O3-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 1288; O3-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 1289; O3-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 1290; O3-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 1291; O3-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 1292; O3-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 1293; O3-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 1294; O3-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 1295; O3-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 1296; O3-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 1297; O3-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 1298; O3-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 1299; O3-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 1300; O3-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 1301; O3-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 1302; O3-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 1303; O3-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 1304; O3-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 1305; O3-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 1306; O3-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 1307; O3-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 1308; O3-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 1309; O3-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 1310; O3-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 1311; O3-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 1312; O3-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 1313; O3-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 1314; O3-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 1315; O3-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 1316; O3-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 1317; O3-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 1318; O3-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 1319; O3-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 1320; O3-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 1321; O3-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 1322; O3-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 1323; O3-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 1324; O3-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 1325; O3-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 1326; O3-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 1327; O3-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 1328; O3-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 1329; O3-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 1330; O3-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 1331; O3-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 1332; O3-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 1333; O3-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 1334; O3-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 1335; O3-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 1336; O3-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 1337; O3-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 1338; O3-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 1339; O3-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 1340; O3-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 1341; O3-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 1342; O3-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 1343; O3-NEXT: ret i32 [[TMP78]] 1344; 1345; O3DEFAULT-LABEL: @nopragma( 1346; O3DEFAULT-NEXT: entry: 1347; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 1348; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 1349; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 1350; O3DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1351; O3DEFAULT-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1352; O3DEFAULT-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 1353; O3DEFAULT-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 1354; O3DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 1355; O3DEFAULT-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 1356; O3DEFAULT-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 1357; O3DEFAULT-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 1358; O3DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 1359; O3DEFAULT-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 1360; O3DEFAULT-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 1361; O3DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 1362; O3DEFAULT-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 1363; O3DEFAULT-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 1364; O3DEFAULT-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 1365; O3DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 1366; O3DEFAULT-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 1367; O3DEFAULT-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 1368; O3DEFAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 1369; O3DEFAULT-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 1370; O3DEFAULT-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 1371; O3DEFAULT-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 1372; O3DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 1373; O3DEFAULT-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 1374; O3DEFAULT-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 1375; O3DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 1376; O3DEFAULT-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 1377; O3DEFAULT-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 1378; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 1379; O3DEFAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 1380; O3DEFAULT-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 1381; O3DEFAULT-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 1382; O3DEFAULT-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 1383; O3DEFAULT-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 1384; O3DEFAULT-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 1385; O3DEFAULT-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 1386; O3DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 1387; O3DEFAULT-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 1388; O3DEFAULT-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 1389; O3DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 1390; O3DEFAULT-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 1391; O3DEFAULT-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 1392; O3DEFAULT-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 1393; O3DEFAULT-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 1394; O3DEFAULT-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 1395; O3DEFAULT-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 1396; O3DEFAULT-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 1397; O3DEFAULT-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 1398; O3DEFAULT-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 1399; O3DEFAULT-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 1400; O3DEFAULT-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 1401; O3DEFAULT-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 1402; O3DEFAULT-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 1403; O3DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 1404; O3DEFAULT-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 1405; O3DEFAULT-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 1406; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 1407; O3DEFAULT-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 1408; O3DEFAULT-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 1409; O3DEFAULT-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 1410; O3DEFAULT-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 1411; O3DEFAULT-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 1412; O3DEFAULT-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 1413; O3DEFAULT-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 1414; O3DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 1415; O3DEFAULT-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 1416; O3DEFAULT-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 1417; O3DEFAULT-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 1418; O3DEFAULT-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 1419; O3DEFAULT-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 1420; O3DEFAULT-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 1421; O3DEFAULT-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 1422; O3DEFAULT-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 1423; O3DEFAULT-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 1424; O3DEFAULT-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 1425; O3DEFAULT-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 1426; O3DEFAULT-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 1427; O3DEFAULT-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 1428; O3DEFAULT-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 1429; O3DEFAULT-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 1430; O3DEFAULT-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 1431; O3DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 1432; O3DEFAULT-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 1433; O3DEFAULT-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 1434; O3DEFAULT-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 1435; O3DEFAULT-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 1436; O3DEFAULT-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 1437; O3DEFAULT-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 1438; O3DEFAULT-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 1439; O3DEFAULT-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 1440; O3DEFAULT-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 1441; O3DEFAULT-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 1442; O3DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 1443; O3DEFAULT-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 1444; O3DEFAULT-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 1445; O3DEFAULT-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 1446; O3DEFAULT-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 1447; O3DEFAULT-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 1448; O3DEFAULT-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 1449; O3DEFAULT-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 1450; O3DEFAULT-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 1451; O3DEFAULT-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 1452; O3DEFAULT-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 1453; O3DEFAULT-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 1454; O3DEFAULT-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 1455; O3DEFAULT-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 1456; O3DEFAULT-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 1457; O3DEFAULT-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 1458; O3DEFAULT-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 1459; O3DEFAULT-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 1460; O3DEFAULT-NEXT: ret i32 [[TMP78]] 1461; 1462; Os-LABEL: @nopragma( 1463; Os-NEXT: entry: 1464; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 1465; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 1466; Os-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 1467; Os-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1468; Os-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1469; Os-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 1470; Os-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 1471; Os-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 1472; Os-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 1473; Os-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 1474; Os-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 1475; Os-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 1476; Os-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 1477; Os-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 1478; Os-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 1479; Os-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 1480; Os-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 1481; Os-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 1482; Os-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 1483; Os-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 1484; Os-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 1485; Os-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 1486; Os-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 1487; Os-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 1488; Os-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 1489; Os-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 1490; Os-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 1491; Os-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 1492; Os-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 1493; Os-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 1494; Os-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 1495; Os-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 1496; Os-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 1497; Os-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 1498; Os-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 1499; Os-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 1500; Os-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 1501; Os-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 1502; Os-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 1503; Os-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 1504; Os-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 1505; Os-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 1506; Os-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 1507; Os-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 1508; Os-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 1509; Os-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 1510; Os-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 1511; Os-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 1512; Os-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 1513; Os-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 1514; Os-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 1515; Os-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 1516; Os-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 1517; Os-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 1518; Os-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 1519; Os-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 1520; Os-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 1521; Os-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 1522; Os-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 1523; Os-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 1524; Os-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 1525; Os-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 1526; Os-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 1527; Os-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 1528; Os-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 1529; Os-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 1530; Os-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 1531; Os-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 1532; Os-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 1533; Os-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 1534; Os-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 1535; Os-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 1536; Os-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 1537; Os-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 1538; Os-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 1539; Os-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 1540; Os-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 1541; Os-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 1542; Os-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 1543; Os-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 1544; Os-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 1545; Os-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 1546; Os-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 1547; Os-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 1548; Os-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 1549; Os-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 1550; Os-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 1551; Os-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 1552; Os-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 1553; Os-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 1554; Os-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 1555; Os-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 1556; Os-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 1557; Os-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 1558; Os-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 1559; Os-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 1560; Os-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 1561; Os-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 1562; Os-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 1563; Os-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 1564; Os-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 1565; Os-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 1566; Os-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 1567; Os-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 1568; Os-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 1569; Os-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 1570; Os-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 1571; Os-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 1572; Os-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 1573; Os-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 1574; Os-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 1575; Os-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 1576; Os-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 1577; Os-NEXT: ret i32 [[TMP78]] 1578; 1579; Oz-LABEL: @nopragma( 1580; Oz-NEXT: entry: 1581; Oz-NEXT: br label [[FOR_BODY:%.*]] 1582; Oz: for.body: 1583; Oz-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1584; Oz-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1585; Oz-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1586; Oz-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1587; Oz-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1588; Oz-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1589; Oz-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1590; Oz-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 1591; Oz-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] 1592; Oz: for.end: 1593; Oz-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1594; Oz-NEXT: ret i32 [[TMP1]] 1595; 1596; O1VEC2-LABEL: @nopragma( 1597; O1VEC2-NEXT: entry: 1598; O1VEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1599; O1VEC2: vector.ph: 1600; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 1601; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 1602; O1VEC2-NEXT: br label [[VECTOR_BODY:%.*]] 1603; O1VEC2: vector.body: 1604; O1VEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1605; O1VEC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1606; O1VEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] 1607; O1VEC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 1608; O1VEC2-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* 1609; O1VEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 1610; O1VEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1611; O1VEC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] 1612; O1VEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 1613; O1VEC2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 1614; O1VEC2-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP7]], align 4 1615; O1VEC2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 1616; O1VEC2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 1617; O1VEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 1618; O1VEC2: middle.block: 1619; O1VEC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64 1620; O1VEC2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1621; O1VEC2: scalar.ph: 1622; O1VEC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1623; O1VEC2-NEXT: br label [[FOR_BODY:%.*]] 1624; O1VEC2: for.body: 1625; O1VEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1626; O1VEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 1627; O1VEC2-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1628; O1VEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[N]] 1629; O1VEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 1630; O1VEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1631; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1632; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 1633; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 1634; O1VEC2: for.end: 1635; O1VEC2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A]], align 4 1636; O1VEC2-NEXT: ret i32 [[TMP10]] 1637; 1638; OzVEC2-LABEL: @nopragma( 1639; OzVEC2-NEXT: entry: 1640; OzVEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1641; OzVEC2: vector.ph: 1642; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 1643; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 1644; OzVEC2-NEXT: br label [[VECTOR_BODY:%.*]] 1645; OzVEC2: vector.body: 1646; OzVEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1647; OzVEC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1648; OzVEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] 1649; OzVEC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 1650; OzVEC2-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* 1651; OzVEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 1652; OzVEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1653; OzVEC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] 1654; OzVEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 1655; OzVEC2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 1656; OzVEC2-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP7]], align 4 1657; OzVEC2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 1658; OzVEC2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 1659; OzVEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 1660; OzVEC2: middle.block: 1661; OzVEC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64 1662; OzVEC2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1663; OzVEC2: scalar.ph: 1664; OzVEC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1665; OzVEC2-NEXT: br label [[FOR_BODY:%.*]] 1666; OzVEC2: for.body: 1667; OzVEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1668; OzVEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 1669; OzVEC2-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1670; OzVEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[N]] 1671; OzVEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 1672; OzVEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1673; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1674; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 1675; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 1676; OzVEC2: for.end: 1677; OzVEC2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A]], align 4 1678; OzVEC2-NEXT: ret i32 [[TMP10]] 1679; 1680; O3DIS-LABEL: @nopragma( 1681; O3DIS-NEXT: entry: 1682; O3DIS-NEXT: br label [[FOR_BODY:%.*]] 1683; O3DIS: for.body: 1684; O3DIS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1685; O3DIS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1686; O3DIS-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1687; O3DIS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1688; O3DIS-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1689; O3DIS-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1690; O3DIS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1691; O3DIS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 1692; O3DIS-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] 1693; O3DIS: for.end: 1694; O3DIS-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1695; O3DIS-NEXT: ret i32 [[TMP1]] 1696; 1697entry: 1698 br label %for.body 1699 1700for.body: ; preds = %for.body, %entry 1701 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 1702 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv 1703 %0 = load i32, i32* %arrayidx, align 4 1704 %add = add nsw i32 %0, %N 1705 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 1706 store i32 %add, i32* %arrayidx2, align 4 1707 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1708 %exitcond = icmp eq i64 %indvars.iv.next, 64 1709 br i1 %exitcond, label %for.end, label %for.body 1710 1711for.end: ; preds = %for.body 1712 %1 = load i32, i32* %a, align 4 1713 ret i32 %1 1714} 1715 1716define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { 1717; O1-LABEL: @disabled( 1718; O1-NEXT: entry: 1719; O1-NEXT: br label [[FOR_BODY:%.*]] 1720; O1: for.body: 1721; O1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1722; O1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1723; O1-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1724; O1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1725; O1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1726; O1-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1727; O1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1728; O1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1729; O1-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0 1730; O1: for.end: 1731; O1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1732; O1-NEXT: ret i32 [[TMP1]] 1733; 1734; O2-LABEL: @disabled( 1735; O2-NEXT: entry: 1736; O2-NEXT: br label [[FOR_BODY:%.*]] 1737; O2: for.body: 1738; O2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1739; O2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1740; O2-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1741; O2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1742; O2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1743; O2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1744; O2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1745; O2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1746; O2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0 1747; O2: for.end: 1748; O2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1749; O2-NEXT: ret i32 [[TMP1]] 1750; 1751; O3-LABEL: @disabled( 1752; O3-NEXT: entry: 1753; O3-NEXT: br label [[FOR_BODY:%.*]] 1754; O3: for.body: 1755; O3-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1756; O3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1757; O3-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1758; O3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1759; O3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1760; O3-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1761; O3-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1762; O3-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1763; O3-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0 1764; O3: for.end: 1765; O3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1766; O3-NEXT: ret i32 [[TMP1]] 1767; 1768; O3DEFAULT-LABEL: @disabled( 1769; O3DEFAULT-NEXT: entry: 1770; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 1771; O3DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1772; O3DEFAULT-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 1773; O3DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer 1774; O3DEFAULT-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP1]], [[TMP3]] 1775; O3DEFAULT-NEXT: [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 1776; O3DEFAULT-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 1777; O3DEFAULT-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 1778; O3DEFAULT-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 1779; O3DEFAULT-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX_4]] to <4 x i32>* 1780; O3DEFAULT-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 1781; O3DEFAULT-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP3]] 1782; O3DEFAULT-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX2_4]] to <4 x i32>* 1783; O3DEFAULT-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4 1784; O3DEFAULT-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 1785; O3DEFAULT-NEXT: [[ARRAYIDX2_8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 1786; O3DEFAULT-NEXT: [[TMP10:%.*]] = bitcast i32* [[ARRAYIDX_8]] to <4 x i32>* 1787; O3DEFAULT-NEXT: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 1788; O3DEFAULT-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP11]], [[TMP3]] 1789; O3DEFAULT-NEXT: [[TMP13:%.*]] = bitcast i32* [[ARRAYIDX2_8]] to <4 x i32>* 1790; O3DEFAULT-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP13]], align 4 1791; O3DEFAULT-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 1792; O3DEFAULT-NEXT: [[ARRAYIDX2_12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 1793; O3DEFAULT-NEXT: [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX_12]] to <4 x i32>* 1794; O3DEFAULT-NEXT: [[TMP15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 1795; O3DEFAULT-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[TMP15]], [[TMP3]] 1796; O3DEFAULT-NEXT: [[TMP17:%.*]] = bitcast i32* [[ARRAYIDX2_12]] to <4 x i32>* 1797; O3DEFAULT-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP17]], align 4 1798; O3DEFAULT-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 1799; O3DEFAULT-NEXT: [[ARRAYIDX2_16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 1800; O3DEFAULT-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX_16]] to <4 x i32>* 1801; O3DEFAULT-NEXT: [[TMP19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP18]], align 4 1802; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[TMP19]], [[TMP3]] 1803; O3DEFAULT-NEXT: [[TMP21:%.*]] = bitcast i32* [[ARRAYIDX2_16]] to <4 x i32>* 1804; O3DEFAULT-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP21]], align 4 1805; O3DEFAULT-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 1806; O3DEFAULT-NEXT: [[ARRAYIDX2_20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 1807; O3DEFAULT-NEXT: [[TMP22:%.*]] = bitcast i32* [[ARRAYIDX_20]] to <4 x i32>* 1808; O3DEFAULT-NEXT: [[TMP23:%.*]] = load <4 x i32>, <4 x i32>* [[TMP22]], align 4 1809; O3DEFAULT-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[TMP23]], [[TMP3]] 1810; O3DEFAULT-NEXT: [[TMP25:%.*]] = bitcast i32* [[ARRAYIDX2_20]] to <4 x i32>* 1811; O3DEFAULT-NEXT: store <4 x i32> [[TMP24]], <4 x i32>* [[TMP25]], align 4 1812; O3DEFAULT-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 1813; O3DEFAULT-NEXT: [[ARRAYIDX2_24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 1814; O3DEFAULT-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARRAYIDX_24]] to <4 x i32>* 1815; O3DEFAULT-NEXT: [[TMP27:%.*]] = load <4 x i32>, <4 x i32>* [[TMP26]], align 4 1816; O3DEFAULT-NEXT: [[TMP28:%.*]] = add nsw <4 x i32> [[TMP27]], [[TMP3]] 1817; O3DEFAULT-NEXT: [[TMP29:%.*]] = bitcast i32* [[ARRAYIDX2_24]] to <4 x i32>* 1818; O3DEFAULT-NEXT: store <4 x i32> [[TMP28]], <4 x i32>* [[TMP29]], align 4 1819; O3DEFAULT-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 1820; O3DEFAULT-NEXT: [[ARRAYIDX2_28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 1821; O3DEFAULT-NEXT: [[TMP30:%.*]] = bitcast i32* [[ARRAYIDX_28]] to <4 x i32>* 1822; O3DEFAULT-NEXT: [[TMP31:%.*]] = load <4 x i32>, <4 x i32>* [[TMP30]], align 4 1823; O3DEFAULT-NEXT: [[TMP32:%.*]] = add nsw <4 x i32> [[TMP31]], [[TMP3]] 1824; O3DEFAULT-NEXT: [[TMP33:%.*]] = bitcast i32* [[ARRAYIDX2_28]] to <4 x i32>* 1825; O3DEFAULT-NEXT: store <4 x i32> [[TMP32]], <4 x i32>* [[TMP33]], align 4 1826; O3DEFAULT-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 1827; O3DEFAULT-NEXT: [[ARRAYIDX2_32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 1828; O3DEFAULT-NEXT: [[TMP34:%.*]] = bitcast i32* [[ARRAYIDX_32]] to <4 x i32>* 1829; O3DEFAULT-NEXT: [[TMP35:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 1830; O3DEFAULT-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[TMP35]], [[TMP3]] 1831; O3DEFAULT-NEXT: [[TMP37:%.*]] = bitcast i32* [[ARRAYIDX2_32]] to <4 x i32>* 1832; O3DEFAULT-NEXT: store <4 x i32> [[TMP36]], <4 x i32>* [[TMP37]], align 4 1833; O3DEFAULT-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 1834; O3DEFAULT-NEXT: [[ARRAYIDX2_36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 1835; O3DEFAULT-NEXT: [[TMP38:%.*]] = bitcast i32* [[ARRAYIDX_36]] to <4 x i32>* 1836; O3DEFAULT-NEXT: [[TMP39:%.*]] = load <4 x i32>, <4 x i32>* [[TMP38]], align 4 1837; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP3]] 1838; O3DEFAULT-NEXT: [[TMP41:%.*]] = bitcast i32* [[ARRAYIDX2_36]] to <4 x i32>* 1839; O3DEFAULT-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP41]], align 4 1840; O3DEFAULT-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 1841; O3DEFAULT-NEXT: [[ARRAYIDX2_40:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 1842; O3DEFAULT-NEXT: [[TMP42:%.*]] = bitcast i32* [[ARRAYIDX_40]] to <4 x i32>* 1843; O3DEFAULT-NEXT: [[TMP43:%.*]] = load <4 x i32>, <4 x i32>* [[TMP42]], align 4 1844; O3DEFAULT-NEXT: [[TMP44:%.*]] = add nsw <4 x i32> [[TMP43]], [[TMP3]] 1845; O3DEFAULT-NEXT: [[TMP45:%.*]] = bitcast i32* [[ARRAYIDX2_40]] to <4 x i32>* 1846; O3DEFAULT-NEXT: store <4 x i32> [[TMP44]], <4 x i32>* [[TMP45]], align 4 1847; O3DEFAULT-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 1848; O3DEFAULT-NEXT: [[ARRAYIDX2_44:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 1849; O3DEFAULT-NEXT: [[TMP46:%.*]] = bitcast i32* [[ARRAYIDX_44]] to <4 x i32>* 1850; O3DEFAULT-NEXT: [[TMP47:%.*]] = load <4 x i32>, <4 x i32>* [[TMP46]], align 4 1851; O3DEFAULT-NEXT: [[TMP48:%.*]] = add nsw <4 x i32> [[TMP47]], [[TMP3]] 1852; O3DEFAULT-NEXT: [[TMP49:%.*]] = bitcast i32* [[ARRAYIDX2_44]] to <4 x i32>* 1853; O3DEFAULT-NEXT: store <4 x i32> [[TMP48]], <4 x i32>* [[TMP49]], align 4 1854; O3DEFAULT-NEXT: [[TMP50:%.*]] = load i32, i32* [[A]], align 4 1855; O3DEFAULT-NEXT: ret i32 [[TMP50]] 1856; 1857; Os-LABEL: @disabled( 1858; Os-NEXT: entry: 1859; Os-NEXT: br label [[FOR_BODY:%.*]] 1860; Os: for.body: 1861; Os-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1862; Os-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1863; Os-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1864; Os-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1865; Os-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1866; Os-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1867; Os-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1868; Os-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1869; Os-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0 1870; Os: for.end: 1871; Os-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1872; Os-NEXT: ret i32 [[TMP1]] 1873; 1874; Oz-LABEL: @disabled( 1875; Oz-NEXT: entry: 1876; Oz-NEXT: br label [[FOR_BODY:%.*]] 1877; Oz: for.body: 1878; Oz-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1879; Oz-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1880; Oz-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1881; Oz-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1882; Oz-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1883; Oz-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1884; Oz-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1885; Oz-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1886; Oz-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0 1887; Oz: for.end: 1888; Oz-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1889; Oz-NEXT: ret i32 [[TMP1]] 1890; 1891; O1VEC2-LABEL: @disabled( 1892; O1VEC2-NEXT: entry: 1893; O1VEC2-NEXT: br label [[FOR_BODY:%.*]] 1894; O1VEC2: for.body: 1895; O1VEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1896; O1VEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1897; O1VEC2-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1898; O1VEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1899; O1VEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1900; O1VEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1901; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1902; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1903; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !4 1904; O1VEC2: for.end: 1905; O1VEC2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1906; O1VEC2-NEXT: ret i32 [[TMP1]] 1907; 1908; OzVEC2-LABEL: @disabled( 1909; OzVEC2-NEXT: entry: 1910; OzVEC2-NEXT: br label [[FOR_BODY:%.*]] 1911; OzVEC2: for.body: 1912; OzVEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1913; OzVEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1914; OzVEC2-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1915; OzVEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1916; OzVEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1917; OzVEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1918; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1919; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1920; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !4 1921; OzVEC2: for.end: 1922; OzVEC2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1923; OzVEC2-NEXT: ret i32 [[TMP1]] 1924; 1925; O3DIS-LABEL: @disabled( 1926; O3DIS-NEXT: entry: 1927; O3DIS-NEXT: br label [[FOR_BODY:%.*]] 1928; O3DIS: for.body: 1929; O3DIS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1930; O3DIS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1931; O3DIS-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1932; O3DIS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1933; O3DIS-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1934; O3DIS-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1935; O3DIS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1936; O3DIS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1937; O3DIS-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !0 1938; O3DIS: for.end: 1939; O3DIS-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1940; O3DIS-NEXT: ret i32 [[TMP1]] 1941; 1942entry: 1943 br label %for.body 1944 1945for.body: ; preds = %for.body, %entry 1946 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 1947 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv 1948 %0 = load i32, i32* %arrayidx, align 4 1949 %add = add nsw i32 %0, %N 1950 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 1951 store i32 %add, i32* %arrayidx2, align 4 1952 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1953 %exitcond = icmp eq i64 %indvars.iv.next, 48 1954 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 1955 1956for.end: ; preds = %for.body 1957 %1 = load i32, i32* %a, align 4 1958 ret i32 %1 1959} 1960 1961!0 = !{!0, !1} 1962!1 = !{!"llvm.loop.vectorize.enable", i1 1} 1963!2 = !{!2, !3} 1964!3 = !{!"llvm.loop.vectorize.enable", i1 0} 1965