1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -basic-aa -slp-vectorizer -S -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s 3 4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-apple-macosx10.9.0" 6 7; Test if the budget for the scheduling region size works. 8; We test with a reduced budget of 16 which should prevent vectorizing the loads. 9 10declare void @unknown() 11 12define void @test(float * %a, float * %b, float * %c, float * %d) { 13; CHECK-LABEL: @test( 14; CHECK-NEXT: entry: 15; CHECK-NEXT: [[L0:%.*]] = load float, float* [[A:%.*]], align 4 16; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1 17; CHECK-NEXT: [[L1:%.*]] = load float, float* [[A1]], align 4 18; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2 19; CHECK-NEXT: [[L2:%.*]] = load float, float* [[A2]], align 4 20; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3 21; CHECK-NEXT: [[L3:%.*]] = load float, float* [[A3]], align 4 22; CHECK-NEXT: call void @unknown() 23; CHECK-NEXT: call void @unknown() 24; CHECK-NEXT: call void @unknown() 25; CHECK-NEXT: call void @unknown() 26; CHECK-NEXT: call void @unknown() 27; CHECK-NEXT: call void @unknown() 28; CHECK-NEXT: call void @unknown() 29; CHECK-NEXT: call void @unknown() 30; CHECK-NEXT: call void @unknown() 31; CHECK-NEXT: call void @unknown() 32; CHECK-NEXT: call void @unknown() 33; CHECK-NEXT: call void @unknown() 34; CHECK-NEXT: call void @unknown() 35; CHECK-NEXT: call void @unknown() 36; CHECK-NEXT: call void @unknown() 37; CHECK-NEXT: call void @unknown() 38; CHECK-NEXT: call void @unknown() 39; CHECK-NEXT: call void @unknown() 40; CHECK-NEXT: call void @unknown() 41; CHECK-NEXT: call void @unknown() 42; CHECK-NEXT: call void @unknown() 43; CHECK-NEXT: call void @unknown() 44; CHECK-NEXT: call void @unknown() 45; CHECK-NEXT: call void @unknown() 46; CHECK-NEXT: call void @unknown() 47; CHECK-NEXT: call void @unknown() 48; CHECK-NEXT: call void @unknown() 49; CHECK-NEXT: call void @unknown() 50; CHECK-NEXT: store float [[L0]], float* [[B:%.*]], align 4 51; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds float, float* [[B]], i64 1 52; CHECK-NEXT: store float [[L1]], float* [[B1]], align 4 53; CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 54; CHECK-NEXT: store float [[L2]], float* [[B2]], align 4 55; CHECK-NEXT: [[B3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 56; CHECK-NEXT: store float [[L3]], float* [[B3]], align 4 57; CHECK-NEXT: [[C1:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 1 58; CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds float, float* [[C]], i64 2 59; CHECK-NEXT: [[C3:%.*]] = getelementptr inbounds float, float* [[C]], i64 3 60; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[C]] to <4 x float>* 61; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 62; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds float, float* [[D:%.*]], i64 1 63; CHECK-NEXT: [[D2:%.*]] = getelementptr inbounds float, float* [[D]], i64 2 64; CHECK-NEXT: [[D3:%.*]] = getelementptr inbounds float, float* [[D]], i64 3 65; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[D]] to <4 x float>* 66; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4 67; CHECK-NEXT: ret void 68; 69entry: 70 ; Don't vectorize these loads. 71 %l0 = load float, float* %a 72 %a1 = getelementptr inbounds float, float* %a, i64 1 73 %l1 = load float, float* %a1 74 %a2 = getelementptr inbounds float, float* %a, i64 2 75 %l2 = load float, float* %a2 76 %a3 = getelementptr inbounds float, float* %a, i64 3 77 %l3 = load float, float* %a3 78 79 ; some unrelated instructions inbetween to enlarge the scheduling region 80 call void @unknown() 81 call void @unknown() 82 call void @unknown() 83 call void @unknown() 84 call void @unknown() 85 call void @unknown() 86 call void @unknown() 87 call void @unknown() 88 call void @unknown() 89 call void @unknown() 90 call void @unknown() 91 call void @unknown() 92 call void @unknown() 93 call void @unknown() 94 call void @unknown() 95 call void @unknown() 96 call void @unknown() 97 call void @unknown() 98 call void @unknown() 99 call void @unknown() 100 call void @unknown() 101 call void @unknown() 102 call void @unknown() 103 call void @unknown() 104 call void @unknown() 105 call void @unknown() 106 call void @unknown() 107 call void @unknown() 108 109 ; Don't vectorize these stores because their operands are too far away. 110 store float %l0, float* %b 111 %b1 = getelementptr inbounds float, float* %b, i64 1 112 store float %l1, float* %b1 113 %b2 = getelementptr inbounds float, float* %b, i64 2 114 store float %l2, float* %b2 115 %b3 = getelementptr inbounds float, float* %b, i64 3 116 store float %l3, float* %b3 117 118 ; But still vectorize the following instructions, because even if the budget 119 ; is exceeded there is a minimum region size. 120 %l4 = load float, float* %c 121 %c1 = getelementptr inbounds float, float* %c, i64 1 122 %l5 = load float, float* %c1 123 %c2 = getelementptr inbounds float, float* %c, i64 2 124 %l6 = load float, float* %c2 125 %c3 = getelementptr inbounds float, float* %c, i64 3 126 %l7 = load float, float* %c3 127 128 store float %l4, float* %d 129 %d1 = getelementptr inbounds float, float* %d, i64 1 130 store float %l5, float* %d1 131 %d2 = getelementptr inbounds float, float* %d, i64 2 132 store float %l6, float* %d2 133 %d3 = getelementptr inbounds float, float* %d, i64 3 134 store float %l7, float* %d3 135 136 ret void 137} 138 139