1; Test memcpy using MVC. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s 4 5declare void @llvm.memcpy.p0i8.p0i8.i32(i8 *nocapture, i8 *nocapture, i32, i32, i1) nounwind 6declare void @llvm.memcpy.p0i8.p0i8.i64(i8 *nocapture, i8 *nocapture, i64, i32, i1) nounwind 7declare void @foo(i8 *, i8 *) 8 9; Test a no-op move, i32 version. 10define void @f1(i8 *%dest, i8 *%src) { 11; CHECK-LABEL: f1: 12; CHECK-NOT: %r2 13; CHECK-NOT: %r3 14; CHECK: br %r14 15 call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 0, i32 1, 16 i1 false) 17 ret void 18} 19 20; Test a no-op move, i64 version. 21define void @f2(i8 *%dest, i8 *%src) { 22; CHECK-LABEL: f2: 23; CHECK-NOT: %r2 24; CHECK-NOT: %r3 25; CHECK: br %r14 26 call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 0, i32 1, 27 i1 false) 28 ret void 29} 30 31; Test a 1-byte move, i32 version. 32define void @f3(i8 *%dest, i8 *%src) { 33; CHECK-LABEL: f3: 34; CHECK: mvc 0(1,%r2), 0(%r3) 35; CHECK: br %r14 36 call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 1, i32 1, 37 i1 false) 38 ret void 39} 40 41; Test a 1-byte move, i64 version. 42define void @f4(i8 *%dest, i8 *%src) { 43; CHECK-LABEL: f4: 44; CHECK: mvc 0(1,%r2), 0(%r3) 45; CHECK: br %r14 46 call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1, i32 1, 47 i1 false) 48 ret void 49} 50 51; Test the upper range of a single MVC, i32 version. 52define void @f5(i8 *%dest, i8 *%src) { 53; CHECK-LABEL: f5: 54; CHECK: mvc 0(256,%r2), 0(%r3) 55; CHECK: br %r14 56 call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 256, i32 1, 57 i1 false) 58 ret void 59} 60 61; Test the upper range of a single MVC, i64 version. 62define void @f6(i8 *%dest, i8 *%src) { 63; CHECK-LABEL: f6: 64; CHECK: mvc 0(256,%r2), 0(%r3) 65; CHECK: br %r14 66 call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 256, i32 1, 67 i1 false) 68 ret void 69} 70 71; Test the first case that needs two MVCs. 72define void @f7(i8 *%dest, i8 *%src) { 73; CHECK-LABEL: f7: 74; CHECK: mvc 0(256,%r2), 0(%r3) 75; CHECK: mvc 256(1,%r2), 256(%r3) 76; CHECK: br %r14 77 call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 257, i32 1, 78 i1 false) 79 ret void 80} 81 82; Test the last-but-one case that needs two MVCs. 83define void @f8(i8 *%dest, i8 *%src) { 84; CHECK-LABEL: f8: 85; CHECK: mvc 0(256,%r2), 0(%r3) 86; CHECK: mvc 256(255,%r2), 256(%r3) 87; CHECK: br %r14 88 call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 511, i32 1, 89 i1 false) 90 ret void 91} 92 93; Test the last case that needs two MVCs. 94define void @f9(i8 *%dest, i8 *%src) { 95; CHECK-LABEL: f9: 96; CHECK: mvc 0(256,%r2), 0(%r3) 97; CHECK: mvc 256(256,%r2), 256(%r3) 98; CHECK: br %r14 99 call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 512, i32 1, 100 i1 false) 101 ret void 102} 103 104; Test an arbitrary value that uses straight-line code. 105define void @f10(i8 *%dest, i8 *%src) { 106; CHECK-LABEL: f10: 107; CHECK: mvc 0(256,%r2), 0(%r3) 108; CHECK: mvc 256(256,%r2), 256(%r3) 109; CHECK: mvc 512(256,%r2), 512(%r3) 110; CHECK: mvc 768(256,%r2), 768(%r3) 111; CHECK: mvc 1024(255,%r2), 1024(%r3) 112; CHECK: br %r14 113 call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1, 114 i1 false) 115 ret void 116} 117 118; ...and again in cases where not all parts are in range of MVC. 119define void @f11(i8 *%srcbase, i8 *%destbase) { 120; CHECK-LABEL: f11: 121; CHECK: mvc 4000(256,%r2), 3500(%r3) 122; CHECK: lay [[NEWDEST:%r[1-5]]], 4256(%r2) 123; CHECK: mvc 0(256,[[NEWDEST]]), 3756(%r3) 124; CHECK: mvc 256(256,[[NEWDEST]]), 4012(%r3) 125; CHECK: lay [[NEWSRC:%r[1-5]]], 4268(%r3) 126; CHECK: mvc 512(256,[[NEWDEST]]), 0([[NEWSRC]]) 127; CHECK: mvc 768(255,[[NEWDEST]]), 256([[NEWSRC]]) 128; CHECK: br %r14 129 %dest = getelementptr i8 *%srcbase, i64 4000 130 %src = getelementptr i8* %destbase, i64 3500 131 call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1, 132 i1 false) 133 ret void 134} 135 136; ...and again with a destination frame base that goes out of range. 137define void @f12() { 138; CHECK-LABEL: f12: 139; CHECK: brasl %r14, foo@PLT 140; CHECK: mvc 4076(256,%r15), 2100(%r15) 141; CHECK: lay [[NEWDEST:%r[1-5]]], 4332(%r15) 142; CHECK: mvc 0(256,[[NEWDEST]]), 2356(%r15) 143; CHECK: mvc 256(256,[[NEWDEST]]), 2612(%r15) 144; CHECK: mvc 512(256,[[NEWDEST]]), 2868(%r15) 145; CHECK: mvc 768(255,[[NEWDEST]]), 3124(%r15) 146; CHECK: brasl %r14, foo@PLT 147; CHECK: br %r14 148 %arr = alloca [6000 x i8] 149 %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 3900 150 %src = getelementptr [6000 x i8] *%arr, i64 0, i64 1924 151 call void @foo(i8 *%dest, i8 *%src) 152 call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1, 153 i1 false) 154 call void @foo(i8 *%dest, i8 *%src) 155 ret void 156} 157 158; ...and again with a source frame base that goes out of range. 159define void @f13() { 160; CHECK-LABEL: f13: 161; CHECK: brasl %r14, foo@PLT 162; CHECK: mvc 200(256,%r15), 3826(%r15) 163; CHECK: mvc 456(256,%r15), 4082(%r15) 164; CHECK: lay [[NEWSRC:%r[1-5]]], 4338(%r15) 165; CHECK: mvc 712(256,%r15), 0([[NEWSRC]]) 166; CHECK: mvc 968(256,%r15), 256([[NEWSRC]]) 167; CHECK: mvc 1224(255,%r15), 512([[NEWSRC]]) 168; CHECK: brasl %r14, foo@PLT 169; CHECK: br %r14 170 %arr = alloca [6000 x i8] 171 %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 24 172 %src = getelementptr [6000 x i8] *%arr, i64 0, i64 3650 173 call void @foo(i8 *%dest, i8 *%src) 174 call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1, 175 i1 false) 176 call void @foo(i8 *%dest, i8 *%src) 177 ret void 178} 179 180; Test the last case that is done using straight-line code. 181define void @f14(i8 *%dest, i8 *%src) { 182; CHECK-LABEL: f14: 183; CHECK: mvc 0(256,%r2), 0(%r3) 184; CHECK: mvc 256(256,%r2), 256(%r3) 185; CHECK: mvc 512(256,%r2), 512(%r3) 186; CHECK: mvc 768(256,%r2), 768(%r3) 187; CHECK: mvc 1024(256,%r2), 1024(%r3) 188; CHECK: mvc 1280(256,%r2), 1280(%r3) 189; CHECK: br %r14 190 call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1536, i32 1, 191 i1 false) 192 ret void 193} 194 195; Test the first case that is done using a loop. 196define void @f15(i8 *%dest, i8 *%src) { 197; CHECK-LABEL: f15: 198; CHECK: lghi [[COUNT:%r[0-5]]], 6 199; CHECK: [[LABEL:\.L[^:]*]]: 200; CHECK: pfd 2, 768(%r2) 201; CHECK: mvc 0(256,%r2), 0(%r3) 202; CHECK: la %r2, 256(%r2) 203; CHECK: la %r3, 256(%r3) 204; CHECK: brctg [[COUNT]], [[LABEL]] 205; CHECK: mvc 0(1,%r2), 0(%r3) 206; CHECK: br %r14 207 call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1, 208 i1 false) 209 ret void 210} 211 212; ...and again with frame bases, where the base must be loaded into a 213; register before the loop. 214define void @f16() { 215; CHECK-LABEL: f16: 216; CHECK: brasl %r14, foo@PLT 217; CHECK-DAG: lghi [[COUNT:%r[0-5]]], 6 218; CHECK-DAG: la [[BASE:%r[0-5]]], 160(%r15) 219; CHECK: [[LABEL:\.L[^:]*]]: 220; CHECK: pfd 2, 2368([[BASE]]) 221; CHECK: mvc 1600(256,[[BASE]]), 0([[BASE]]) 222; CHECK: la [[BASE]], 256([[BASE]]) 223; CHECK: brctg [[COUNT]], [[LABEL]] 224; CHECK: mvc 1600(1,[[BASE]]), 0([[BASE]]) 225; CHECK: brasl %r14, foo@PLT 226; CHECK: br %r14 227 %arr = alloca [3200 x i8] 228 %dest = getelementptr [3200 x i8] *%arr, i64 0, i64 1600 229 %src = getelementptr [3200 x i8] *%arr, i64 0, i64 0 230 call void @foo(i8 *%dest, i8 *%src) 231 call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1, 232 i1 false) 233 call void @foo(i8 *%dest, i8 *%src) 234 ret void 235} 236