1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM 2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM 3 4; Test that basic bulk memory codegen works correctly 5 6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 7target triple = "wasm64-unknown-unknown" 8 9declare void @llvm.memcpy.p0i8.p0i8.i8(i8*, i8*, i8, i1) 10declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) 11declare void @llvm.memcpy.p0i32.p0i32.i64(i32*, i32*, i64, i1) 12 13declare void @llvm.memmove.p0i8.p0i8.i8(i8*, i8*, i8, i1) 14declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i1) 15declare void @llvm.memmove.p0i32.p0i32.i64(i32*, i32*, i64, i1) 16 17declare void @llvm.memset.p0i8.i8(i8*, i8, i8, i1) 18declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) 19declare void @llvm.memset.p0i32.i64(i32*, i8, i64, i1) 20 21; CHECK-LABEL: memcpy_i8: 22; NO-BULK-MEM-NOT: memory.copy 23; BULK-MEM-NEXT: .functype memcpy_i8 (i64, i64, i32) -> () 24; BULK-MEM-NEXT: i64.extend_i32_u $push0=, $2 25; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop0 26; BULK-MEM-NEXT: return 27define void @memcpy_i8(i8* %dest, i8* %src, i8 zeroext %len) { 28 call void @llvm.memcpy.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0) 29 ret void 30} 31 32; CHECK-LABEL: memmove_i8: 33; NO-BULK-MEM-NOT: memory.copy 34; BULK-MEM-NEXT: .functype memmove_i8 (i64, i64, i32) -> () 35; BULK-MEM-NEXT: i64.extend_i32_u $push0=, $2 36; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop0 37; BULK-MEM-NEXT: return 38define void @memmove_i8(i8* %dest, i8* %src, i8 zeroext %len) { 39 call void @llvm.memmove.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0) 40 ret void 41} 42 43; CHECK-LABEL: memset_i8: 44; NO-BULK-MEM-NOT: memory.fill 45; BULK-MEM-NEXT: .functype memset_i8 (i64, i32, i32) -> () 46; BULK-MEM-NEXT: i64.extend_i32_u $push0=, $2 47; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop0 48; BULK-MEM-NEXT: return 49define void @memset_i8(i8* %dest, i8 %val, i8 zeroext %len) { 50 call void @llvm.memset.p0i8.i8(i8* %dest, i8 %val, i8 %len, i1 0) 51 ret void 52} 53 54; CHECK-LABEL: memcpy_i32: 55; NO-BULK-MEM-NOT: memory.copy 56; BULK-MEM-NEXT: .functype memcpy_i32 (i64, i64, i64) -> () 57; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 58; BULK-MEM-NEXT: return 59define void @memcpy_i32(i32* %dest, i32* %src, i64 %len) { 60 call void @llvm.memcpy.p0i32.p0i32.i64(i32* %dest, i32* %src, i64 %len, i1 0) 61 ret void 62} 63 64; CHECK-LABEL: memmove_i32: 65; NO-BULK-MEM-NOT: memory.copy 66; BULK-MEM-NEXT: .functype memmove_i32 (i64, i64, i64) -> () 67; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 68; BULK-MEM-NEXT: return 69define void @memmove_i32(i32* %dest, i32* %src, i64 %len) { 70 call void @llvm.memmove.p0i32.p0i32.i64(i32* %dest, i32* %src, i64 %len, i1 0) 71 ret void 72} 73 74; CHECK-LABEL: memset_i32: 75; NO-BULK-MEM-NOT: memory.fill 76; BULK-MEM-NEXT: .functype memset_i32 (i64, i32, i64) -> () 77; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2 78; BULK-MEM-NEXT: return 79define void @memset_i32(i32* %dest, i8 %val, i64 %len) { 80 call void @llvm.memset.p0i32.i64(i32* %dest, i8 %val, i64 %len, i1 0) 81 ret void 82} 83 84; CHECK-LABEL: memcpy_1: 85; CHECK-NEXT: .functype memcpy_1 (i64, i64) -> () 86; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) 87; CHECK-NEXT: i32.store8 0($0), $pop[[L0]] 88; CHECK-NEXT: return 89define void @memcpy_1(i8* %dest, i8* %src) { 90 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1, i1 0) 91 ret void 92} 93 94; CHECK-LABEL: memmove_1: 95; CHECK-NEXT: .functype memmove_1 (i64, i64) -> () 96; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) 97; CHECK-NEXT: i32.store8 0($0), $pop[[L0]] 98; CHECK-NEXT: return 99define void @memmove_1(i8* %dest, i8* %src) { 100 call void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1, i1 0) 101 ret void 102} 103 104; CHECK-LABEL: memset_1: 105; NO-BULK-MEM-NOT: memory.fill 106; BULK-MEM-NEXT: .functype memset_1 (i64, i32) -> () 107; BULK-MEM-NEXT: i32.store8 0($0), $1 108; BULK-MEM-NEXT: return 109define void @memset_1(i8* %dest, i8 %val) { 110 call void @llvm.memset.p0i8.i64(i8* %dest, i8 %val, i64 1, i1 0) 111 ret void 112} 113 114; CHECK-LABEL: memcpy_1024: 115; NO-BULK-MEM-NOT: memory.copy 116; BULK-MEM-NEXT: .functype memcpy_1024 (i64, i64) -> () 117; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 118; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] 119; BULK-MEM-NEXT: return 120define void @memcpy_1024(i8* %dest, i8* %src) { 121 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1024, i1 0) 122 ret void 123} 124 125; CHECK-LABEL: memmove_1024: 126; NO-BULK-MEM-NOT: memory.copy 127; BULK-MEM-NEXT: .functype memmove_1024 (i64, i64) -> () 128; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 129; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] 130; BULK-MEM-NEXT: return 131define void @memmove_1024(i8* %dest, i8* %src) { 132 call void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1024, i1 0) 133 ret void 134} 135 136; CHECK-LABEL: memset_1024: 137; NO-BULK-MEM-NOT: memory.fill 138; BULK-MEM-NEXT: .functype memset_1024 (i64, i32) -> () 139; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 140; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]] 141; BULK-MEM-NEXT: return 142define void @memset_1024(i8* %dest, i8 %val) { 143 call void @llvm.memset.p0i8.i64(i8* %dest, i8 %val, i64 1024, i1 0) 144 ret void 145} 146 147; The following tests check that frame index elimination works for 148; bulk memory instructions. The stack pointer is bumped by 112 instead 149; of 100 because the stack pointer in WebAssembly is currently always 150; 16-byte aligned, even in leaf functions, although it is not written 151; back to the global in this case. 152 153; TODO: Change TransientStackAlignment to 1 to avoid this extra 154; arithmetic. This will require forcing the use of StackAlignment in 155; PrologEpilogEmitter.cpp when 156; WebAssemblyFrameLowering::needsSPWriteback would be true. 157 158; CHECK-LABEL: memcpy_alloca_src: 159; NO-BULK-MEM-NOT: memory.copy 160; BULK-MEM-NEXT: .functype memcpy_alloca_src (i64) -> () 161; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 162; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112 163; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 164; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 165; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] 166; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 167; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] 168; BULK-MEM-NEXT: return 169define void @memcpy_alloca_src(i8* %dst) { 170 %a = alloca [100 x i8] 171 %p = bitcast [100 x i8]* %a to i8* 172 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 100, i1 false) 173 ret void 174} 175 176; CHECK-LABEL: memcpy_alloca_dst: 177; NO-BULK-MEM-NOT: memory.copy 178; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i64) -> () 179; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 180; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112 181; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 182; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 183; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] 184; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 185; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] 186; BULK-MEM-NEXT: return 187define void @memcpy_alloca_dst(i8* %src) { 188 %a = alloca [100 x i8] 189 %p = bitcast [100 x i8]* %a to i8* 190 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %src, i64 100, i1 false) 191 ret void 192} 193 194; CHECK-LABEL: memset_alloca: 195; NO-BULK-MEM-NOT: memory.fill 196; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () 197; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 198; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112 199; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 200; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 201; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] 202; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 203; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] 204; BULK-MEM-NEXT: return 205define void @memset_alloca(i8 %val) { 206 %a = alloca [100 x i8] 207 %p = bitcast [100 x i8]* %a to i8* 208 call void @llvm.memset.p0i8.i64(i8* %p, i8 %val, i64 100, i1 false) 209 ret void 210} 211