#!/usr/bin/env ruby # Copyright (c) 2023-2024 Huawei Device Co., Ltd. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # wrapper over the library function to avoid # the prologue bloating in the caller function(:MemmoveInterposer, params: {dst: 'ptr', src: 'ptr', size: 'u32'}, mode: [:FastPath]) { Call(dst, src, size).Method("memmove").ptr ReturnVoid().void } #if Options.arch_64_bits? # $regmask = RegMask.new($full_regmap, :tmp0, :tmp1, :arg0, :arg1, :arg2, :arg3, :arg4) #else $regmask = $panda_mask #end # inputs: # src_start is len in bytes function(:TryBigCopy, params: {dst_data: 'ptr', src_data: 'ptr', len: 'u32'}, regalloc_set: $regmask, regmap: $full_regmap, mode: [:FastPath]) { # Arm32 is not supported if Options.arch == :arm32 Intrinsic(:UNREACHABLE).void.Terminator next end len_bytes := len len_word := Cast(len_bytes).word # if bytes size > 4096 bytes, copy via system routine compare := Compare(len_bytes, 4096).GT.b IfImm(compare).Imm(0).NE.Unlikely.b { LiveOut(dst_data).DstReg(regmap[:arg0]).ptr LiveOut(src_data).DstReg(regmap[:arg1]).ptr LiveOut(len_bytes).DstReg(regmap[:arg2]).u32 ep_offset = get_entrypoint_offset("MEMMOVE_INTERPOSER") Intrinsic(:TAIL_CALL).AddImm(ep_offset).MethodAsImm("MemmoveInterposer").Terminator.void Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG } If(src_data, dst_data).CC(:CC_A).b { Goto(:ForwardCopy) } gap := Sub(dst_data, src_data).word If(gap, len_word).AE.b { Goto(:ForwardCopy) } # BackwardCopy compare := Compare(len_bytes, 64).GE.b IfImm(compare).Imm(0).NE.b { Goto(:BackwardCopy64) } idx := Sub(len_bytes, 8).i32 buf := Load(src_data, idx).u64 Label(:BackwardCopyLoop) idx := Phi(idx, idx_prev).i32 buf := Phi(buf, buf_prev).u64 compare := Compare(idx, 8).LE.b IfImm(compare).Imm(0).NE.Unlikely.b { Goto(:BackwardCopyTail) } Store(dst_data, idx, buf).u64 idx_prev := Sub(idx, 8).i32 buf_prev := Load(src_data, idx_prev).u64 Goto(:BackwardCopyLoop) Label(:BackwardCopyTail) buf1 := LoadI(src_data).Imm(0).u64 Store(dst_data, idx, buf).u64 StoreI(dst_data, buf1).Imm(0).u64 Goto(:End) Label(:BackwardCopy64) # This is naive x6 loop unroll for 64+ bytes # TODO (asidorov): remove when LoopUnroll pass added in Irtoc first_buf0 := LoadI(src_data).Imm(0).u64 first_buf1 := LoadI(src_data).Imm(8).u64 first_buf2 := LoadI(src_data).Imm(16).u64 first_buf3 := LoadI(src_data).Imm(24).u64 first_buf4 := LoadI(src_data).Imm(32).u64 first_buf5 := LoadI(src_data).Imm(40).u64 len_tail := SubI(len_word).Imm(48).word src_ptr_init := Add(src_data, len_tail).ptr dst_ptr_init := Add(dst_data, len_tail).ptr Label(:BackwardCopyLoop64) src_ptr := Phi(src_ptr_init, prev_src_ptr).ptr dst_ptr := Phi(dst_ptr_init, prev_dst_ptr).ptr buf5 := LoadI(src_ptr).Imm(40).u64 buf4 := LoadI(src_ptr).Imm(32).u64 buf3 := LoadI(src_ptr).Imm(24).u64 buf2 := LoadI(src_ptr).Imm(16).u64 buf1 := LoadI(src_ptr).Imm(8).u64 buf0 := LoadI(src_ptr).Imm(0).u64 StoreI(dst_ptr, buf5).Imm(40).u64 StoreI(dst_ptr, buf4).Imm(32).u64 StoreI(dst_ptr, buf3).Imm(24).u64 StoreI(dst_ptr, buf2).Imm(16).u64 StoreI(dst_ptr, buf1).Imm(8).u64 StoreI(dst_ptr, buf0).Imm(0).u64 prev_src_ptr := SubI(src_ptr).Imm(48).ptr prev_dst_ptr := SubI(dst_ptr).Imm(48).ptr If(prev_src_ptr, src_data).CC(:CC_B).Unlikely.b { Goto(:BackwardCopyTail64) } Else { Goto(:BackwardCopyLoop64) } Label(:BackwardCopyTail64) StoreI(dst_data, first_buf5).Imm(40).u64 StoreI(dst_data, first_buf4).Imm(32).u64 StoreI(dst_data, first_buf3).Imm(24).u64 StoreI(dst_data, first_buf2).Imm(16).u64 StoreI(dst_data, first_buf1).Imm(8).u64 StoreI(dst_data, first_buf0).Imm(0).u64 Goto(:End) Label(:ForwardCopy) compare := Compare(len_bytes, 64).GE.b IfImm(compare).Imm(0).NE.b { Goto(:ForwardCopy64) } # ForwardCopy len_bytes := Sub(len_bytes, 16).i32 idx := 0 buf := Load(src_data, idx).u64 Label(:ForwardCopyLoop) idx := Phi(idx, idx_next).i32 buf := Phi(buf, buf_next).u64 If(idx, len_bytes).GE.Unlikely.b { Goto(:ForwardCopyTail) } Store(dst_data, idx, buf).u64 idx_next := Add(idx, 8).i32 buf_next := Load(src_data, idx_next).u64 Goto(:ForwardCopyLoop) Label(:ForwardCopyTail) len_bytes := Add(len_bytes, 8).i32 buf1 := Load(src_data, len_bytes).u64 Store(dst_data, idx, buf).u64 Store(dst_data, len_bytes, buf1).u64 Goto(:End) Label(:ForwardCopy64) # This is naive x6 loop unroll for 64+ bytes # TODO (asidorov): remove when LoopUnroll pass added in Irtoc len_tail := SubI(len_word).Imm(48).word src_end_data := Add(src_data, len_tail).ptr last_buf0 := LoadI(src_end_data).Imm(0).u64 last_buf1 := LoadI(src_end_data).Imm(8).u64 last_buf2 := LoadI(src_end_data).Imm(16).u64 last_buf3 := LoadI(src_end_data).Imm(24).u64 last_buf4 := LoadI(src_end_data).Imm(32).u64 last_buf5 := LoadI(src_end_data).Imm(40).u64 Label(:ForwardCopyLoop64) src_ptr := Phi(src_data, next_src_ptr).ptr dst_ptr := Phi(dst_data, next_dst_ptr).ptr buf0 := LoadI(src_ptr).Imm(0).u64 buf1 := LoadI(src_ptr).Imm(8).u64 buf2 := LoadI(src_ptr).Imm(16).u64 buf3 := LoadI(src_ptr).Imm(24).u64 buf4 := LoadI(src_ptr).Imm(32).u64 buf5 := LoadI(src_ptr).Imm(40).u64 StoreI(dst_ptr, buf0).Imm(0).u64 StoreI(dst_ptr, buf1).Imm(8).u64 StoreI(dst_ptr, buf2).Imm(16).u64 StoreI(dst_ptr, buf3).Imm(24).u64 StoreI(dst_ptr, buf4).Imm(32).u64 StoreI(dst_ptr, buf5).Imm(40).u64 next_src_ptr := AddI(src_ptr).Imm(48).ptr next_dst_ptr := AddI(dst_ptr).Imm(48).ptr If(next_src_ptr, src_end_data).AE.Unlikely.b { Goto(:ForwardCopyTail64) } Else { Goto(:ForwardCopyLoop64) } Label(:ForwardCopyTail64) dst_end_data := Add(dst_data, len_tail).ptr StoreI(dst_end_data, last_buf0).Imm(0).u64 StoreI(dst_end_data, last_buf1).Imm(8).u64 StoreI(dst_end_data, last_buf2).Imm(16).u64 StoreI(dst_end_data, last_buf3).Imm(24).u64 StoreI(dst_end_data, last_buf4).Imm(32).u64 StoreI(dst_end_data, last_buf5).Imm(40).u64 ### End copy Label(:End) ReturnVoid().void }