1;------------------------------------------------------------------------------ 2; 3; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR> 4; This program and the accompanying materials 5; are licensed and made available under the terms and conditions of the BSD License 6; which accompanies this distribution. The full text of the license may be found at 7; http://opensource.org/licenses/bsd-license.php. 8; 9; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, 10; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. 11; 12; Module Name: 13; 14; CopyMem.nasm 15; 16; Abstract: 17; 18; CopyMem function 19; 20; Notes: 21; 22;------------------------------------------------------------------------------ 23 24 SECTION .text 25 26;------------------------------------------------------------------------------ 27; VOID * 28; InternalMemCopyMem ( 29; IN VOID *Destination, 30; IN VOID *Source, 31; IN UINTN Count 32; ); 33;------------------------------------------------------------------------------ 34global ASM_PFX(InternalMemCopyMem) 35ASM_PFX(InternalMemCopyMem): 36 push esi 37 push edi 38 mov esi, [esp + 16] ; esi <- Source 39 mov edi, [esp + 12] ; edi <- Destination 40 mov edx, [esp + 20] ; edx <- Count 41 lea eax, [esi + edx - 1] ; eax <- End of Source 42 cmp esi, edi 43 jae .0 44 cmp eax, edi ; Overlapped? 45 jae @CopyBackward ; Copy backward if overlapped 46.0: 47 xor ecx, ecx 48 sub ecx, edi 49 and ecx, 15 ; ecx + edi aligns on 16-byte boundary 50 jz .1 51 cmp ecx, edx 52 cmova ecx, edx 53 sub edx, ecx ; edx <- remaining bytes to copy 54 rep movsb 55.1: 56 mov ecx, edx 57 and edx, 15 58 shr ecx, 4 ; ecx <- # of DQwords to copy 59 jz @CopyBytes 60 add esp, -16 61 movdqu [esp], xmm0 ; save xmm0 62.2: 63 movdqu xmm0, [esi] ; esi may not be 16-bytes aligned 64 movntdq [edi], xmm0 ; edi should be 16-bytes aligned 65 add esi, 16 66 add edi, 16 67 loop .2 68 mfence 69 movdqu xmm0, [esp] ; restore xmm0 70 add esp, 16 ; stack cleanup 71 jmp @CopyBytes 72@CopyBackward: 73 mov esi, eax ; esi <- Last byte in Source 74 lea edi, [edi + edx - 1] ; edi <- Last byte in Destination 75 std 76@CopyBytes: 77 mov ecx, edx 78 rep movsb 79 cld 80 mov eax, [esp + 12] ; eax <- Destination as return value 81 pop edi 82 pop esi 83 ret 84 85