1/* 2 * Copyright 2010 The Android Open Source Project 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8/* Changes: 9 * 2010-08-11 Steve McIntyre <steve.mcintyre@arm.com> 10 * Added small changes to the two functions to make them work on the 11 * specified number of 16- or 32-bit values rather than the original 12 * code which was specified as a count of bytes. More verbose comments 13 * to aid future maintenance. 14 */ 15 16 .text 17 .align 4 18 .syntax unified 19 20 .global arm_memset32 21 .type arm_memset32, %function 22 .global arm_memset16 23 .type arm_memset16, %function 24 25/* 26 * Optimized memset functions for ARM. 27 * 28 * void arm_memset16(uint16_t* dst, uint16_t value, int count); 29 * void arm_memset32(uint32_t* dst, uint32_t value, int count); 30 * 31 */ 32arm_memset16: 33 .fnstart 34 push {lr} 35 36 /* if count is equal to zero then abort */ 37 teq r2, #0 38 ble .Lfinish 39 40 /* Multiply count by 2 - go from the number of 16-bit shorts 41 * to the number of bytes desired. */ 42 mov r2, r2, lsl #1 43 44 /* expand the data to 32 bits */ 45 orr r1, r1, r1, lsl #16 46 47 /* align to 32 bits */ 48 tst r0, #2 49 strhne r1, [r0], #2 50 subne r2, r2, #2 51 52 /* Now jump into the main loop below. */ 53 b .Lwork_32 54 .fnend 55 56arm_memset32: 57 .fnstart 58 push {lr} 59 60 /* if count is equal to zero then abort */ 61 teq r2, #0 62 ble .Lfinish 63 64 /* Multiply count by 4 - go from the number of 32-bit words to 65 * the number of bytes desired. */ 66 mov r2, r2, lsl #2 67 68.Lwork_32: 69 /* Set up registers ready for writing them out. */ 70 mov ip, r1 71 mov lr, r1 72 73 /* Try to align the destination to a cache line. Assume 32 74 * byte (8 word) cache lines, it's the common case. */ 75 rsb r3, r0, #0 76 ands r3, r3, #0x1C 77 beq .Laligned32 78 cmp r3, r2 79 andhi r3, r2, #0x1C 80 sub r2, r2, r3 81 82 /* (Optionally) write any unaligned leading bytes. 83 * (0-28 bytes, length in r3) */ 84 movs r3, r3, lsl #28 85 stmiacs r0!, {r1, lr} 86 stmiacs r0!, {r1, lr} 87 stmiami r0!, {r1, lr} 88 movs r3, r3, lsl #2 89 strcs r1, [r0], #4 90 91 /* Now quickly loop through the cache-aligned data. */ 92.Laligned32: 93 mov r3, r1 941: subs r2, r2, #32 95 stmiahs r0!, {r1,r3,ip,lr} 96 stmiahs r0!, {r1,r3,ip,lr} 97 bhs 1b 98 add r2, r2, #32 99 100 /* (Optionally) store any remaining trailing bytes. 101 * (0-30 bytes, length in r2) */ 102 movs r2, r2, lsl #28 103 stmiacs r0!, {r1,r3,ip,lr} 104 stmiami r0!, {r1,lr} 105 movs r2, r2, lsl #2 106 strcs r1, [r0], #4 107 strhmi lr, [r0], #2 108 109.Lfinish: 110 pop {pc} 111 .fnend 112