1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes 2; RUN: opt -S -argpromotion < %s | FileCheck %s 3; RUN: opt -S -passes=argpromotion < %s | FileCheck %s 4; Test that we only promote arguments when the caller/callee have compatible 5; function attrubtes. 6 7target triple = "x86_64-unknown-linux-gnu" 8 9; This should promote 10define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { 11; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 12; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) 13; CHECK-NEXT: bb: 14; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] 15; CHECK-NEXT: ret void 16; 17bb: 18 %tmp = load <8 x i64>, <8 x i64>* %arg1 19 store <8 x i64> %tmp, <8 x i64>* %arg 20 ret void 21} 22 23define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 { 24; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512 25; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) 26; CHECK-NEXT: bb: 27; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 28; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 29; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* 30; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) 31; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] 32; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) 33; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 34; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 35; CHECK-NEXT: ret void 36; 37bb: 38 %tmp = alloca <8 x i64>, align 32 39 %tmp2 = alloca <8 x i64>, align 32 40 %tmp3 = bitcast <8 x i64>* %tmp to i8* 41 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) 42 call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp) 43 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 44 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 45 ret void 46} 47 48; This should promote 49define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { 50; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 51; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) 52; CHECK-NEXT: bb: 53; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] 54; CHECK-NEXT: ret void 55; 56bb: 57 %tmp = load <8 x i64>, <8 x i64>* %arg1 58 store <8 x i64> %tmp, <8 x i64>* %arg 59 ret void 60} 61 62define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 { 63; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256 64; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) 65; CHECK-NEXT: bb: 66; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 67; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 68; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* 69; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) 70; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] 71; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) 72; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 73; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 74; CHECK-NEXT: ret void 75; 76bb: 77 %tmp = alloca <8 x i64>, align 32 78 %tmp2 = alloca <8 x i64>, align 32 79 %tmp3 = bitcast <8 x i64>* %tmp to i8* 80 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) 81 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) 82 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 83 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 84 ret void 85} 86 87; This should promote 88define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { 89; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 90; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) 91; CHECK-NEXT: bb: 92; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] 93; CHECK-NEXT: ret void 94; 95bb: 96 %tmp = load <8 x i64>, <8 x i64>* %arg1 97 store <8 x i64> %tmp, <8 x i64>* %arg 98 ret void 99} 100 101define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 { 102; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256 103; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) 104; CHECK-NEXT: bb: 105; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 106; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 107; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* 108; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) 109; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] 110; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) 111; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 112; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 113; CHECK-NEXT: ret void 114; 115bb: 116 %tmp = alloca <8 x i64>, align 32 117 %tmp2 = alloca <8 x i64>, align 32 118 %tmp3 = bitcast <8 x i64>* %tmp to i8* 119 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) 120 call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) 121 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 122 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 123 ret void 124} 125 126; This should promote 127define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { 128; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 129; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) 130; CHECK-NEXT: bb: 131; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] 132; CHECK-NEXT: ret void 133; 134bb: 135 %tmp = load <8 x i64>, <8 x i64>* %arg1 136 store <8 x i64> %tmp, <8 x i64>* %arg 137 ret void 138} 139 140define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 { 141; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512 142; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) 143; CHECK-NEXT: bb: 144; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 145; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 146; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* 147; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) 148; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] 149; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) 150; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 151; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 152; CHECK-NEXT: ret void 153; 154bb: 155 %tmp = alloca <8 x i64>, align 32 156 %tmp2 = alloca <8 x i64>, align 32 157 %tmp3 = bitcast <8 x i64>* %tmp to i8* 158 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) 159 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp) 160 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 161 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 162 ret void 163} 164 165; This should not promote 166define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { 167; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 168; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]]) 169; CHECK-NEXT: bb: 170; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]] 171; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]] 172; CHECK-NEXT: ret void 173; 174bb: 175 %tmp = load <8 x i64>, <8 x i64>* %arg1 176 store <8 x i64> %tmp, <8 x i64>* %arg 177 ret void 178} 179 180define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 { 181; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256 182; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) 183; CHECK-NEXT: bb: 184; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 185; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 186; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* 187; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) 188; CHECK-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]]) 189; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 190; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 191; CHECK-NEXT: ret void 192; 193bb: 194 %tmp = alloca <8 x i64>, align 32 195 %tmp2 = alloca <8 x i64>, align 32 196 %tmp3 = bitcast <8 x i64>* %tmp to i8* 197 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) 198 call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) 199 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 200 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 201 ret void 202} 203 204; This should not promote 205define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 { 206; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 207; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]]) 208; CHECK-NEXT: bb: 209; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]] 210; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]] 211; CHECK-NEXT: ret void 212; 213bb: 214 %tmp = load <8 x i64>, <8 x i64>* %arg1 215 store <8 x i64> %tmp, <8 x i64>* %arg 216 ret void 217} 218 219define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 { 220; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256 221; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) 222; CHECK-NEXT: bb: 223; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 224; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 225; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* 226; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) 227; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]]) 228; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 229; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 230; CHECK-NEXT: ret void 231; 232bb: 233 %tmp = alloca <8 x i64>, align 32 234 %tmp2 = alloca <8 x i64>, align 32 235 %tmp3 = bitcast <8 x i64>* %tmp to i8* 236 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) 237 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) 238 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 239 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 240 ret void 241} 242 243; This should promote 244define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 { 245; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 246; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) 247; CHECK-NEXT: bb: 248; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] 249; CHECK-NEXT: ret void 250; 251bb: 252 %tmp = load <8 x i64>, <8 x i64>* %arg1 253 store <8 x i64> %tmp, <8 x i64>* %arg 254 ret void 255} 256 257define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 { 258; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 259; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) 260; CHECK-NEXT: bb: 261; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 262; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 263; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* 264; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) 265; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] 266; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) 267; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 268; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 269; CHECK-NEXT: ret void 270; 271bb: 272 %tmp = alloca <8 x i64>, align 32 273 %tmp2 = alloca <8 x i64>, align 32 274 %tmp3 = bitcast <8 x i64>* %tmp to i8* 275 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) 276 call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) 277 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 278 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 279 ret void 280} 281 282; This should promote 283define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 { 284; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 285; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) 286; CHECK-NEXT: bb: 287; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] 288; CHECK-NEXT: ret void 289; 290bb: 291 %tmp = load <8 x i64>, <8 x i64>* %arg1 292 store <8 x i64> %tmp, <8 x i64>* %arg 293 ret void 294} 295 296define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 { 297; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256 298; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) 299; CHECK-NEXT: bb: 300; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 301; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 302; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* 303; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) 304; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] 305; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) 306; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 307; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 308; CHECK-NEXT: ret void 309; 310bb: 311 %tmp = alloca <8 x i64>, align 32 312 %tmp2 = alloca <8 x i64>, align 32 313 %tmp3 = bitcast <8 x i64>* %tmp to i8* 314 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) 315 call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) 316 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 317 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 318 ret void 319} 320 321; If the arguments are scalar, its ok to promote. 322define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %X, i32* %Y) #2 { 323; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 324; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]]) 325; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]] 326; CHECK-NEXT: ret i32 [[C]] 327; 328 %A = load i32, i32* %X 329 %B = load i32, i32* %Y 330 %C = add i32 %A, %B 331 ret i32 %C 332} 333 334define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %B) #2 { 335; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256 336; CHECK-SAME: (i32* [[B:%.*]]) 337; CHECK-NEXT: [[A:%.*]] = alloca i32 338; CHECK-NEXT: store i32 1, i32* [[A]] 339; CHECK-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]] 340; CHECK-NEXT: [[B_VAL:%.*]] = load i32, i32* [[B]] 341; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]]) 342; CHECK-NEXT: ret i32 [[C]] 343; 344 %A = alloca i32 345 store i32 1, i32* %A 346 %C = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %A, i32* %B) 347 ret i32 %C 348} 349 350; If the arguments are scalar, its ok to promote. 351define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %X, i32* %Y) #2 { 352; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 353; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]]) 354; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]] 355; CHECK-NEXT: ret i32 [[C]] 356; 357 %A = load i32, i32* %X 358 %B = load i32, i32* %Y 359 %C = add i32 %A, %B 360 ret i32 %C 361} 362 363define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %B) #2 { 364; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256 365; CHECK-SAME: (i32* [[B:%.*]]) 366; CHECK-NEXT: [[A:%.*]] = alloca i32 367; CHECK-NEXT: store i32 1, i32* [[A]] 368; CHECK-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]] 369; CHECK-NEXT: [[B_VAL:%.*]] = load i32, i32* [[B]] 370; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]]) 371; CHECK-NEXT: ret i32 [[C]] 372; 373 %A = alloca i32 374 store i32 1, i32* %A 375 %C = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %A, i32* %B) 376 ret i32 %C 377} 378 379; Function Attrs: argmemonly nounwind 380declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5 381 382attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" } 383attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" } 384attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" } 385attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" } 386attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" } 387attributes #5 = { argmemonly nounwind } 388