1; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s 2 3 4declare i32 @sample_add(i32, i32) 5declare i32 @stack_callee_int(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) 6declare i32 @stack_callee_int_szext(i1 signext, i8 zeroext, i32, i32, i32, i32, i32, i32, i16 zeroext, i8 signext) 7declare float @stack_callee_float(float, float, float, float, float, float, float, float, float, float) 8declare void @test(i64) 9 10; Scalar argument passing must not change (same tests as in VE/Scalar/call.ll below - this time with +vpu) 11 12define fastcc i32 @sample_call() { 13; CHECK-LABEL: sample_call: 14; CHECK: .LBB{{[0-9]+}}_2: 15; CHECK-NEXT: lea %s0, sample_add@lo 16; CHECK-NEXT: and %s0, %s0, (32)0 17; CHECK-NEXT: lea.sl %s12, sample_add@hi(, %s0) 18; CHECK-NEXT: or %s0, 1, (0)1 19; CHECK-NEXT: or %s1, 2, (0)1 20; CHECK-NEXT: bsic %s10, (, %s12) 21; CHECK-NEXT: or %s11, 0, %s9 22 %r = tail call fastcc i32 @sample_add(i32 1, i32 2) 23 ret i32 %r 24} 25 26define fastcc i32 @stack_call_int() { 27; CHECK-LABEL: stack_call_int: 28; CHECK: .LBB{{[0-9]+}}_2: 29; CHECK-NEXT: or %s0, 10, (0)1 30; CHECK-NEXT: st %s0, 248(, %s11) 31; CHECK-NEXT: or %s34, 9, (0)1 32; CHECK-NEXT: lea %s0, stack_callee_int@lo 33; CHECK-NEXT: and %s0, %s0, (32)0 34; CHECK-NEXT: lea.sl %s12, stack_callee_int@hi(, %s0) 35; CHECK-NEXT: or %s0, 1, (0)1 36; CHECK-NEXT: or %s1, 2, (0)1 37; CHECK-NEXT: or %s2, 3, (0)1 38; CHECK-NEXT: or %s3, 4, (0)1 39; CHECK-NEXT: or %s4, 5, (0)1 40; CHECK-NEXT: or %s5, 6, (0)1 41; CHECK-NEXT: or %s6, 7, (0)1 42; CHECK-NEXT: or %s7, 8, (0)1 43; CHECK-NEXT: st %s34, 240(, %s11) 44; CHECK-NEXT: bsic %s10, (, %s12) 45; CHECK-NEXT: or %s11, 0, %s9 46 %r = tail call fastcc i32 @stack_callee_int(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10) 47 ret i32 %r 48} 49 50define fastcc i32 @stack_call_int_szext() { 51; CHECK-LABEL: stack_call_int_szext: 52; CHECK: .LBB{{[0-9]+}}_2: 53; CHECK-NEXT: or %s0, -1, (0)1 54; CHECK-NEXT: st %s0, 248(, %s11) 55; CHECK-NEXT: lea %s34, 65535 56; CHECK-NEXT: lea %s0, stack_callee_int_szext@lo 57; CHECK-NEXT: and %s0, %s0, (32)0 58; CHECK-NEXT: lea.sl %s12, stack_callee_int_szext@hi(, %s0) 59; CHECK-NEXT: or %s0, -1, (0)1 60; CHECK-NEXT: lea %s1, 255 61; CHECK-NEXT: or %s2, 3, (0)1 62; CHECK-NEXT: or %s3, 4, (0)1 63; CHECK-NEXT: or %s4, 5, (0)1 64; CHECK-NEXT: or %s5, 6, (0)1 65; CHECK-NEXT: or %s6, 7, (0)1 66; CHECK-NEXT: or %s7, 8, (0)1 67; CHECK-NEXT: st %s34, 240(, %s11) 68; CHECK-NEXT: bsic %s10, (, %s12) 69; CHECK-NEXT: or %s11, 0, %s9 70 %r = tail call fastcc i32 @stack_callee_int_szext(i1 -1, i8 -1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i16 -1, i8 -1) 71 ret i32 %r 72} 73 74define fastcc float @stack_call_float() { 75; CHECK-LABEL: stack_call_float: 76; CHECK: .LBB{{[0-9]+}}_2: 77; CHECK-NEXT: lea.sl %s0, 1092616192 78; CHECK-NEXT: st %s0, 248(, %s11) 79; CHECK-NEXT: lea.sl %s34, 1091567616 80; CHECK-NEXT: lea %s0, stack_callee_float@lo 81; CHECK-NEXT: and %s0, %s0, (32)0 82; CHECK-NEXT: lea.sl %s12, stack_callee_float@hi(, %s0) 83; CHECK-NEXT: lea.sl %s0, 1065353216 84; CHECK-NEXT: lea.sl %s1, 1073741824 85; CHECK-NEXT: lea.sl %s2, 1077936128 86; CHECK-NEXT: lea.sl %s3, 1082130432 87; CHECK-NEXT: lea.sl %s4, 1084227584 88; CHECK-NEXT: lea.sl %s5, 1086324736 89; CHECK-NEXT: lea.sl %s6, 1088421888 90; CHECK-NEXT: lea.sl %s7, 1090519040 91; CHECK-NEXT: st %s34, 240(, %s11) 92; CHECK-NEXT: bsic %s10, (, %s12) 93; CHECK-NEXT: or %s11, 0, %s9 94 %r = tail call fastcc float @stack_callee_float(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0) 95 ret float %r 96} 97 98define fastcc float @stack_call_float2(float %p0) { 99; CHECK-LABEL: stack_call_float2: 100; CHECK: .LBB{{[0-9]+}}_2: 101; CHECK-NEXT: st %s0, 248(, %s11) 102; CHECK-NEXT: lea %s1, stack_callee_float@lo 103; CHECK-NEXT: and %s1, %s1, (32)0 104; CHECK-NEXT: lea.sl %s12, stack_callee_float@hi(, %s1) 105; CHECK-NEXT: st %s0, 240(, %s11) 106; CHECK-NEXT: or %s1, 0, %s0 107; CHECK-NEXT: or %s2, 0, %s0 108; CHECK-NEXT: or %s3, 0, %s0 109; CHECK-NEXT: or %s4, 0, %s0 110; CHECK-NEXT: or %s5, 0, %s0 111; CHECK-NEXT: or %s6, 0, %s0 112; CHECK-NEXT: or %s7, 0, %s0 113; CHECK-NEXT: bsic %s10, (, %s12) 114; CHECK-NEXT: or %s11, 0, %s9 115 %r = tail call fastcc float @stack_callee_float(float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0) 116 ret float %r 117} 118 119; Vector argument passing (fastcc feature) 120; 121declare fastcc <256 x i32> @get_v256i32() 122declare fastcc void @vsample_v(<256 x i32>) 123declare fastcc void @vsample_iv(i32, <256 x i32>) 124 125define void @caller_vret() { 126; CHECK: caller_vret: 127; CHECK: .LBB{{[0-9]+}}_2: 128; CHECK-NEXT: lea %s0, get_v256i32@lo 129; CHECK-NEXT: and %s0, %s0, (32)0 130; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0) 131; CHECK-NEXT: bsic %s10, (, %s12) 132; CHECK-NEXT: or %s11, 0, %s9 133 %r = tail call fastcc <256 x i32> @get_v256i32() 134 ret void 135} 136 137define void @caller_vret_pass_p0() { 138; CHECK-LABEL: caller_vret_pass_p0: 139; CHECK: .LBB{{[0-9]+}}_2: 140; CHECK: lea %s0, get_v256i32@lo 141; CHECK-NEXT: and %s0, %s0, (32)0 142; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0) 143; CHECK-NEXT: bsic %s10, (, %s12) 144; CHECK-NEXT: lea %s0, vsample_v@lo 145; CHECK-NEXT: and %s0, %s0, (32)0 146; CHECK-NEXT: lea.sl %s12, vsample_v@hi(, %s0) 147; CHECK-NEXT: bsic %s10, (, %s12) 148; CHECK-NEXT: or %s11, 0, %s9 149 %p = tail call fastcc <256 x i32> @get_v256i32() 150 call fastcc void @vsample_v(<256 x i32> %p) 151 ret void 152} 153 154define void @caller_vret_pass_p1(i32 %s) { 155; CHECK-LABEL: caller_vret_pass_p1: 156; CHECK: .LBB{{[0-9]+}}_2: 157; CHECK: or %s18, 0, %s0 158; CHECK-NEXT: lea %s0, get_v256i32@lo 159; CHECK-NEXT: and %s0, %s0, (32)0 160; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0) 161; CHECK-NEXT: bsic %s10, (, %s12) 162; CHECK-NEXT: lea %s0, vsample_iv@lo 163; CHECK-NEXT: and %s0, %s0, (32)0 164; CHECK-NEXT: lea.sl %s12, vsample_iv@hi(, %s0) 165; CHECK-NEXT: or %s0, 0, %s18 166; CHECK-NEXT: bsic %s10, (, %s12) 167 %p = tail call fastcc <256 x i32> @get_v256i32() 168 call fastcc void @vsample_iv(i32 %s, <256 x i32> %p) 169 ret void 170} 171 172declare fastcc void @vsample_vv(<256 x i32>, <256 x i32>) 173declare fastcc void @vsample_vvv(<256 x i32>, <256 x i32>, <256 x i32>) 174 175define void @caller_vret_pass_p01() { 176; CHECK-LABEL: caller_vret_pass_p01: 177; CHECK: .LBB{{[0-9]+}}_2: 178; CHECK-NEXT: lea %s0, get_v256i32@lo 179; CHECK-NEXT: and %s0, %s0, (32)0 180; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0) 181; CHECK-NEXT: bsic %s10, (, %s12) 182; CHECK-NEXT: lea %s0, vsample_vv@lo 183; CHECK-NEXT: and %s0, %s0, (32)0 184; CHECK-NEXT: lea.sl %s12, vsample_vv@hi(, %s0) 185; CHECK-NEXT: lea %s16, 256 186; CHECK-NEXT: lvl %s16 187; CHECK-NEXT: vor %v1, (0)1, %v0 188; CHECK-NEXT: bsic %s10, (, %s12) 189; CHECK-NEXT: or %s11, 0, %s9 190 %p = tail call fastcc <256 x i32> @get_v256i32() 191 call fastcc void @vsample_vv(<256 x i32> %p, <256 x i32> %p) 192 ret void 193} 194 195define void @caller_vret_pass_p012() { 196; CHECK-LABEL: caller_vret_pass_p012: 197; CHECK: .LBB{{[0-9]+}}_2: 198; CHECK-NEXT: lea %s0, get_v256i32@lo 199; CHECK-NEXT: and %s0, %s0, (32)0 200; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0) 201; CHECK-NEXT: bsic %s10, (, %s12) 202; CHECK-NEXT: lea %s0, vsample_vvv@lo 203; CHECK-NEXT: and %s0, %s0, (32)0 204; CHECK-NEXT: lea.sl %s12, vsample_vvv@hi(, %s0) 205; CHECK-NEXT: lea %s16, 256 206; CHECK-NEXT: lvl %s16 207; CHECK-NEXT: vor %v1, (0)1, %v0 208; CHECK-NEXT: lea %s16, 256 209; CHECK-NEXT: lvl %s16 210; CHECK-NEXT: vor %v2, (0)1, %v0 211; CHECK-NEXT: bsic %s10, (, %s12) 212; CHECK-NEXT: or %s11, 0, %s9 213 %p = tail call fastcc <256 x i32> @get_v256i32() 214 call fastcc void @vsample_vvv(<256 x i32> %p, <256 x i32> %p, <256 x i32> %p) 215 ret void 216} 217 218; Expose register parameter mapping by forcing an explicit vreg move for all parameter positions 219declare fastcc void @vsample_vvvvvvv(<256 x i32>, <256 x i32>, <256 x i32>, <256 x i32>, <256 x i32>, <256 x i32>, <256 x i32>) 220 221; TODO improve vreg copy (redundant lea+lvl emitted) 222define fastcc void @roundtrip_caller_callee(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5, <256 x i32> %p6) { 223; CHECK-LABEL: roundtrip_caller_callee: 224; CHECK: .LBB{{[0-9]+}}_2: 225; CHECK-NEXT: lea %s16, 256 226; CHECK-NEXT: lvl %s16 227; CHECK-NEXT: vor %v7, (0)1, %v0 228; CHECK-NEXT: lea %s0, vsample_vvvvvvv@lo 229; CHECK-NEXT: and %s0, %s0, (32)0 230; CHECK-NEXT: lea.sl %s12, vsample_vvvvvvv@hi(, %s0) 231; CHECK-NEXT: lea %s16, 256 232; CHECK-NEXT: lvl %s16 233; CHECK-NEXT: vor %v0, (0)1, %v1 234; CHECK-NEXT: lea %s16, 256 235; CHECK-NEXT: lvl %s16 236; CHECK-NEXT: vor %v1, (0)1, %v2 237; CHECK-NEXT: lea %s16, 256 238; CHECK-NEXT: lvl %s16 239; CHECK-NEXT: vor %v2, (0)1, %v3 240; CHECK-NEXT: lea %s16, 256 241; CHECK-NEXT: lvl %s16 242; CHECK-NEXT: vor %v3, (0)1, %v4 243; CHECK-NEXT: lea %s16, 256 244; CHECK-NEXT: lvl %s16 245; CHECK-NEXT: vor %v4, (0)1, %v5 246; CHECK-NEXT: lea %s16, 256 247; CHECK-NEXT: lvl %s16 248; CHECK-NEXT: vor %v5, (0)1, %v6 249; CHECK-NEXT: lea %s16, 256 250; CHECK-NEXT: lvl %s16 251; CHECK-NEXT: vor %v6, (0)1, %v7 252; CHECK-NEXT: bsic %s10, (, %s12) 253; CHECK-NEXT: or %s11, 0, %s9 254 call fastcc void @vsample_vvvvvvv(<256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5, <256 x i32> %p6, <256 x i32> %p0) 255 ret void 256} 257