1#!/usr/bin/env ruby 2 3# Copyright (c) 2023 Huawei Device Co., Ltd. 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16# wrapper over the library function to avoid 17# the prologue bloating in the caller 18function(:MemmoveInterposer, 19 params: {dst: 'ptr', src: 'ptr', size: 'u32', dummy1: 'u32', dummy2: 'u32'}, 20 mode: [:FastPath]) { 21 Call(dst, src, size).Method("memmove").void 22 ReturnVoid().void 23} 24 25#if Options.arch_64_bits? 26# $regmask = RegMask.new($full_regmap, :tmp0, :tmp1, :arg0, :arg1, :arg2, :arg3, :arg4) 27#else 28 $regmask = $panda_mask 29#end 30# inputs: 31# src_start is len in bytes 32# src_end and dst_start are dummies 33function(:TryBigCopy, 34 params: {dst_data: 'ptr', src_data: 'ptr', len: 'u32', dummy1: 'u32', dummy2: 'u32'}, 35 regalloc_set: $regmask, 36 regmap: $full_regmap, 37 mode: [:FastPath]) { 38 39 # Arm32 is not supported 40 if Options.arch == :arm32 41 Intrinsic(:UNREACHABLE).void.Terminator 42 next 43 end 44 45 len_bytes := len 46 len_word := Cast(len_bytes).word 47 48 # if bytes size > 4096 bytes, copy via system routine 49 compare := Compare(len_bytes, 4096).GT.b 50 IfImm(compare).Imm(0).NE.Unlikely.b { 51 LiveOut(dst_data).DstReg(regmap[:arg0]).ptr 52 LiveOut(src_data).DstReg(regmap[:arg1]).ptr 53 LiveOut(len_bytes).DstReg(regmap[:arg2]).u32 54 entrypoint_id = get_entrypoint_offset("MEMMOVE_INTERPOSER") 55 Intrinsic(:TAIL_CALL).AddImm(entrypoint_id).Terminator.void 56 Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG 57 } 58 59 If(src_data, dst_data).CC(:CC_A).b { 60 Goto(:ForwardCopy) 61 } 62 63 gap := Sub(dst_data, src_data).word 64 If(gap, len_word).AE.b { 65 Goto(:ForwardCopy) 66 } 67 68 # BackwardCopy 69 compare := Compare(len_bytes, 64).GE.b 70 IfImm(compare).Imm(0).NE.b { 71 Goto(:BackwardCopy64) 72 } 73 74 idx := Sub(len_bytes, 8).i32 75 buf := Load(src_data, idx).u64 76 77Label(:BackwardCopyLoop) 78 idx := Phi(idx, idx_prev).i32 79 buf := Phi(buf, buf_prev).u64 80 compare := Compare(idx, 8).LE.b 81 IfImm(compare).Imm(0).NE.Unlikely.b { 82 Goto(:BackwardCopyTail) 83 } 84 Store(dst_data, idx, buf).u64 85 idx_prev := Sub(idx, 8).i32 86 buf_prev := Load(src_data, idx_prev).u64 87 Goto(:BackwardCopyLoop) 88 89Label(:BackwardCopyTail) 90 buf1 := LoadI(src_data).Imm(0).u64 91 Store(dst_data, idx, buf).u64 92 StoreI(dst_data, buf1).Imm(0).u64 93 Goto(:End) 94 95Label(:BackwardCopy64) 96 # This is naive x6 loop unroll for 64+ bytes 97 # TODO (asidorov): remove when LoopUnroll pass added in Irtoc 98 first_buf0 := LoadI(src_data).Imm(0).u64 99 first_buf1 := LoadI(src_data).Imm(8).u64 100 first_buf2 := LoadI(src_data).Imm(16).u64 101 first_buf3 := LoadI(src_data).Imm(24).u64 102 first_buf4 := LoadI(src_data).Imm(32).u64 103 first_buf5 := LoadI(src_data).Imm(40).u64 104 len_tail := SubI(len_word).Imm(48).word 105 src_ptr_init := Add(src_data, len_tail).ptr 106 dst_ptr_init := Add(dst_data, len_tail).ptr 107 108Label(:BackwardCopyLoop64) 109 src_ptr := Phi(src_ptr_init, prev_src_ptr).ptr 110 dst_ptr := Phi(dst_ptr_init, prev_dst_ptr).ptr 111 112 buf5 := LoadI(src_ptr).Imm(40).u64 113 buf4 := LoadI(src_ptr).Imm(32).u64 114 buf3 := LoadI(src_ptr).Imm(24).u64 115 buf2 := LoadI(src_ptr).Imm(16).u64 116 buf1 := LoadI(src_ptr).Imm(8).u64 117 buf0 := LoadI(src_ptr).Imm(0).u64 118 119 StoreI(dst_ptr, buf5).Imm(40).u64 120 StoreI(dst_ptr, buf4).Imm(32).u64 121 StoreI(dst_ptr, buf3).Imm(24).u64 122 StoreI(dst_ptr, buf2).Imm(16).u64 123 StoreI(dst_ptr, buf1).Imm(8).u64 124 StoreI(dst_ptr, buf0).Imm(0).u64 125 126 prev_src_ptr := SubI(src_ptr).Imm(48).ptr 127 prev_dst_ptr := SubI(dst_ptr).Imm(48).ptr 128 If(prev_src_ptr, src_data).CC(:CC_B).Unlikely.b { 129 Goto(:BackwardCopyTail64) 130 } Else { 131 Goto(:BackwardCopyLoop64) 132 } 133 134Label(:BackwardCopyTail64) 135 StoreI(dst_data, first_buf5).Imm(40).u64 136 StoreI(dst_data, first_buf4).Imm(32).u64 137 StoreI(dst_data, first_buf3).Imm(24).u64 138 StoreI(dst_data, first_buf2).Imm(16).u64 139 StoreI(dst_data, first_buf1).Imm(8).u64 140 StoreI(dst_data, first_buf0).Imm(0).u64 141 Goto(:End) 142 143Label(:ForwardCopy) 144 compare := Compare(len_bytes, 64).GE.b 145 IfImm(compare).Imm(0).NE.b { 146 Goto(:ForwardCopy64) 147 } 148 149 # ForwardCopy 150 len_bytes := Sub(len_bytes, 16).i32 151 idx := 0 152 buf := Load(src_data, idx).u64 153 154Label(:ForwardCopyLoop) 155 idx := Phi(idx, idx_next).i32 156 buf := Phi(buf, buf_next).u64 157 If(idx, len_bytes).GE.Unlikely.b { 158 Goto(:ForwardCopyTail) 159 } 160 Store(dst_data, idx, buf).u64 161 idx_next := Add(idx, 8).i32 162 buf_next := Load(src_data, idx_next).u64 163 Goto(:ForwardCopyLoop) 164 165Label(:ForwardCopyTail) 166 len_bytes := Add(len_bytes, 8).i32 167 buf1 := Load(src_data, len_bytes).u64 168 Store(dst_data, idx, buf).u64 169 Store(dst_data, len_bytes, buf1).u64 170 171 Goto(:End) 172 173Label(:ForwardCopy64) 174 # This is naive x6 loop unroll for 64+ bytes 175 # TODO (asidorov): remove when LoopUnroll pass added in Irtoc 176 len_tail := SubI(len_word).Imm(48).word 177 src_end_data := Add(src_data, len_tail).ptr 178 last_buf0 := LoadI(src_end_data).Imm(0).u64 179 last_buf1 := LoadI(src_end_data).Imm(8).u64 180 last_buf2 := LoadI(src_end_data).Imm(16).u64 181 last_buf3 := LoadI(src_end_data).Imm(24).u64 182 last_buf4 := LoadI(src_end_data).Imm(32).u64 183 last_buf5 := LoadI(src_end_data).Imm(40).u64 184 185Label(:ForwardCopyLoop64) 186 src_ptr := Phi(src_data, next_src_ptr).ptr 187 dst_ptr := Phi(dst_data, next_dst_ptr).ptr 188 189 buf0 := LoadI(src_ptr).Imm(0).u64 190 buf1 := LoadI(src_ptr).Imm(8).u64 191 buf2 := LoadI(src_ptr).Imm(16).u64 192 buf3 := LoadI(src_ptr).Imm(24).u64 193 buf4 := LoadI(src_ptr).Imm(32).u64 194 buf5 := LoadI(src_ptr).Imm(40).u64 195 196 StoreI(dst_ptr, buf0).Imm(0).u64 197 StoreI(dst_ptr, buf1).Imm(8).u64 198 StoreI(dst_ptr, buf2).Imm(16).u64 199 StoreI(dst_ptr, buf3).Imm(24).u64 200 StoreI(dst_ptr, buf4).Imm(32).u64 201 StoreI(dst_ptr, buf5).Imm(40).u64 202 203 next_src_ptr := AddI(src_ptr).Imm(48).ptr 204 next_dst_ptr := AddI(dst_ptr).Imm(48).ptr 205 If(next_src_ptr, src_end_data).AE.Unlikely.b { 206 Goto(:ForwardCopyTail64) 207 } Else { 208 Goto(:ForwardCopyLoop64) 209 } 210 211Label(:ForwardCopyTail64) 212 dst_end_data := Add(dst_data, len_tail).ptr 213 214 StoreI(dst_end_data, last_buf0).Imm(0).u64 215 StoreI(dst_end_data, last_buf1).Imm(8).u64 216 StoreI(dst_end_data, last_buf2).Imm(16).u64 217 StoreI(dst_end_data, last_buf3).Imm(24).u64 218 StoreI(dst_end_data, last_buf4).Imm(32).u64 219 StoreI(dst_end_data, last_buf5).Imm(40).u64 220 ### End copy 221 222Label(:End) 223 ReturnVoid().void 224} 225 226