1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28/* 29 * Copyright (c) 2013 ARM Ltd 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. The name of the company may not be used to endorse or promote 41 * products derived from this software without specific prior written 42 * permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 56#include <private/bionic_asm.h> 57 58 .syntax unified 59 60 .thumb 61 .thumb_func 62 63 .macro m_push 64 push {r0, r4, r5, lr} 65 .endm // m_push 66 67 .macro m_ret inst 68 \inst {r0, r4, r5, pc} 69 .endm // m_ret 70 71 .macro m_copy_byte reg, cmd, label 72 ldrb \reg, [r1], #1 73 strb \reg, [r0], #1 74 \cmd \reg, \label 75 .endm // m_copy_byte 76 77ENTRY(strcpy) 78 // Unroll the first 8 bytes that will be copied. 79 m_push 80 m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish 81 m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish 82 m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish 83 m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish 84 m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish 85 m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish 86 m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish 87 m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue 88 89strcpy_finish: 90 m_ret inst=pop 91 92strcpy_continue: 93 pld [r1, #0] 94 ands r3, r0, #7 95 bne strcpy_align_dst 96 97strcpy_check_src_align: 98 // At this point dst is aligned to a double word, check if src 99 // is also aligned to a double word. 100 ands r3, r1, #7 101 bne strcpy_unaligned_copy 102 103 .p2align 2 104strcpy_mainloop: 105 ldmia r1!, {r2, r3} 106 107 pld [r1, #64] 108 109 sub ip, r2, #0x01010101 110 bic ip, ip, r2 111 ands ip, ip, #0x80808080 112 bne strcpy_zero_in_first_register 113 114 sub ip, r3, #0x01010101 115 bic ip, ip, r3 116 ands ip, ip, #0x80808080 117 bne strcpy_zero_in_second_register 118 119 stmia r0!, {r2, r3} 120 b strcpy_mainloop 121 122strcpy_zero_in_first_register: 123 lsls lr, ip, #17 124 itt ne 125 strbne r2, [r0] 126 m_ret inst=popne 127 itt cs 128 strhcs r2, [r0] 129 m_ret inst=popcs 130 lsls ip, ip, #1 131 itt eq 132 streq r2, [r0] 133 m_ret inst=popeq 134 strh r2, [r0], #2 135 lsr r3, r2, #16 136 strb r3, [r0] 137 m_ret inst=pop 138 139strcpy_zero_in_second_register: 140 lsls lr, ip, #17 141 ittt ne 142 stmiane r0!, {r2} 143 strbne r3, [r0] 144 m_ret inst=popne 145 ittt cs 146 strcs r2, [r0], #4 147 strhcs r3, [r0] 148 m_ret inst=popcs 149 lsls ip, ip, #1 150 itt eq 151 stmiaeq r0, {r2, r3} 152 m_ret inst=popeq 153 stmia r0!, {r2} 154 strh r3, [r0], #2 155 lsr r4, r3, #16 156 strb r4, [r0] 157 m_ret inst=pop 158 159strcpy_align_dst: 160 // Align to a double word (64 bits). 161 rsb r3, r3, #8 162 lsls ip, r3, #31 163 beq strcpy_align_to_32 164 165 ldrb r2, [r1], #1 166 strb r2, [r0], #1 167 cbz r2, strcpy_complete 168 169strcpy_align_to_32: 170 bcc strcpy_align_to_64 171 172 ldrb r4, [r1], #1 173 strb r4, [r0], #1 174 cmp r4, #0 175 it eq 176 m_ret inst=popeq 177 ldrb r5, [r1], #1 178 strb r5, [r0], #1 179 cmp r5, #0 180 it eq 181 m_ret inst=popeq 182 183strcpy_align_to_64: 184 tst r3, #4 185 beq strcpy_check_src_align 186 ldr r2, [r1], #4 187 188 sub ip, r2, #0x01010101 189 bic ip, ip, r2 190 ands ip, ip, #0x80808080 191 bne strcpy_zero_in_first_register 192 stmia r0!, {r2} 193 b strcpy_check_src_align 194 195strcpy_complete: 196 m_ret inst=pop 197 198strcpy_unaligned_copy: 199 // Dst is aligned to a double word, while src is at an unknown alignment. 200 // There are 7 different versions of the unaligned copy code 201 // to prevent overreading the src. The mainloop of every single version 202 // will store 64 bits per loop. The difference is how much of src can 203 // be read without potentially crossing a page boundary. 204 tbb [pc, r3] 205strcpy_unaligned_branchtable: 206 .byte 0 207 .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2) 208 .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2) 209 .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2) 210 .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2) 211 .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2) 212 .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2) 213 .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2) 214 215 .p2align 2 216 // Can read 7 bytes before possibly crossing a page. 217strcpy_unalign7: 218 ldr r2, [r1], #4 219 220 sub ip, r2, #0x01010101 221 bic ip, ip, r2 222 ands ip, ip, #0x80808080 223 bne strcpy_zero_in_first_register 224 225 ldrb r3, [r1] 226 cbz r3, strcpy_unalign7_copy5bytes 227 ldrb r4, [r1, #1] 228 cbz r4, strcpy_unalign7_copy6bytes 229 ldrb r5, [r1, #2] 230 cbz r5, strcpy_unalign7_copy7bytes 231 232 ldr r3, [r1], #4 233 pld [r1, #64] 234 235 lsrs ip, r3, #24 236 stmia r0!, {r2, r3} 237 beq strcpy_unalign_return 238 b strcpy_unalign7 239 240strcpy_unalign7_copy5bytes: 241 stmia r0!, {r2} 242 strb r3, [r0] 243strcpy_unalign_return: 244 m_ret inst=pop 245 246strcpy_unalign7_copy6bytes: 247 stmia r0!, {r2} 248 strb r3, [r0], #1 249 strb r4, [r0], #1 250 m_ret inst=pop 251 252strcpy_unalign7_copy7bytes: 253 stmia r0!, {r2} 254 strb r3, [r0], #1 255 strb r4, [r0], #1 256 strb r5, [r0], #1 257 m_ret inst=pop 258 259 .p2align 2 260 // Can read 6 bytes before possibly crossing a page. 261strcpy_unalign6: 262 ldr r2, [r1], #4 263 264 sub ip, r2, #0x01010101 265 bic ip, ip, r2 266 ands ip, ip, #0x80808080 267 bne strcpy_zero_in_first_register 268 269 ldrb r4, [r1] 270 cbz r4, strcpy_unalign_copy5bytes 271 ldrb r5, [r1, #1] 272 cbz r5, strcpy_unalign_copy6bytes 273 274 ldr r3, [r1], #4 275 pld [r1, #64] 276 277 tst r3, #0xff0000 278 beq strcpy_unalign6_copy7bytes 279 lsrs ip, r3, #24 280 stmia r0!, {r2, r3} 281 beq strcpy_unalign_return 282 b strcpy_unalign6 283 284strcpy_unalign6_copy7bytes: 285 stmia r0!, {r2} 286 strh r3, [r0], #2 287 lsr r3, #16 288 strb r3, [r0] 289 m_ret inst=pop 290 291 .p2align 2 292 // Can read 5 bytes before possibly crossing a page. 293strcpy_unalign5: 294 ldr r2, [r1], #4 295 296 sub ip, r2, #0x01010101 297 bic ip, ip, r2 298 ands ip, ip, #0x80808080 299 bne strcpy_zero_in_first_register 300 301 ldrb r4, [r1] 302 cbz r4, strcpy_unalign_copy5bytes 303 304 ldr r3, [r1], #4 305 306 pld [r1, #64] 307 308 sub ip, r3, #0x01010101 309 bic ip, ip, r3 310 ands ip, ip, #0x80808080 311 bne strcpy_zero_in_second_register 312 313 stmia r0!, {r2, r3} 314 b strcpy_unalign5 315 316strcpy_unalign_copy5bytes: 317 stmia r0!, {r2} 318 strb r4, [r0] 319 m_ret inst=pop 320 321strcpy_unalign_copy6bytes: 322 stmia r0!, {r2} 323 strb r4, [r0], #1 324 strb r5, [r0] 325 m_ret inst=pop 326 327 .p2align 2 328 // Can read 4 bytes before possibly crossing a page. 329strcpy_unalign4: 330 ldmia r1!, {r2} 331 332 sub ip, r2, #0x01010101 333 bic ip, ip, r2 334 ands ip, ip, #0x80808080 335 bne strcpy_zero_in_first_register 336 337 ldmia r1!, {r3} 338 pld [r1, #64] 339 340 sub ip, r3, #0x01010101 341 bic ip, ip, r3 342 ands ip, ip, #0x80808080 343 bne strcpy_zero_in_second_register 344 345 stmia r0!, {r2, r3} 346 b strcpy_unalign4 347 348 .p2align 2 349 // Can read 3 bytes before possibly crossing a page. 350strcpy_unalign3: 351 ldrb r2, [r1] 352 cbz r2, strcpy_unalign3_copy1byte 353 ldrb r3, [r1, #1] 354 cbz r3, strcpy_unalign3_copy2bytes 355 ldrb r4, [r1, #2] 356 cbz r4, strcpy_unalign3_copy3bytes 357 358 ldr r2, [r1], #4 359 ldr r3, [r1], #4 360 361 pld [r1, #64] 362 363 lsrs lr, r2, #24 364 beq strcpy_unalign_copy4bytes 365 366 sub ip, r3, #0x01010101 367 bic ip, ip, r3 368 ands ip, ip, #0x80808080 369 bne strcpy_zero_in_second_register 370 371 stmia r0!, {r2, r3} 372 b strcpy_unalign3 373 374strcpy_unalign3_copy1byte: 375 strb r2, [r0] 376 m_ret inst=pop 377 378strcpy_unalign3_copy2bytes: 379 strb r2, [r0], #1 380 strb r3, [r0] 381 m_ret inst=pop 382 383strcpy_unalign3_copy3bytes: 384 strb r2, [r0], #1 385 strb r3, [r0], #1 386 strb r4, [r0] 387 m_ret inst=pop 388 389 .p2align 2 390 // Can read 2 bytes before possibly crossing a page. 391strcpy_unalign2: 392 ldrb r2, [r1] 393 cbz r2, strcpy_unalign_copy1byte 394 ldrb r3, [r1, #1] 395 cbz r3, strcpy_unalign_copy2bytes 396 397 ldr r2, [r1], #4 398 ldr r3, [r1], #4 399 pld [r1, #64] 400 401 tst r2, #0xff0000 402 beq strcpy_unalign_copy3bytes 403 lsrs ip, r2, #24 404 beq strcpy_unalign_copy4bytes 405 406 sub ip, r3, #0x01010101 407 bic ip, ip, r3 408 ands ip, ip, #0x80808080 409 bne strcpy_zero_in_second_register 410 411 stmia r0!, {r2, r3} 412 b strcpy_unalign2 413 414 .p2align 2 415 // Can read 1 byte before possibly crossing a page. 416strcpy_unalign1: 417 ldrb r2, [r1] 418 cbz r2, strcpy_unalign_copy1byte 419 420 ldr r2, [r1], #4 421 ldr r3, [r1], #4 422 423 pld [r1, #64] 424 425 sub ip, r2, #0x01010101 426 bic ip, ip, r2 427 ands ip, ip, #0x80808080 428 bne strcpy_zero_in_first_register 429 430 sub ip, r3, #0x01010101 431 bic ip, ip, r3 432 ands ip, ip, #0x80808080 433 bne strcpy_zero_in_second_register 434 435 stmia r0!, {r2, r3} 436 b strcpy_unalign1 437 438strcpy_unalign_copy1byte: 439 strb r2, [r0] 440 m_ret inst=pop 441 442strcpy_unalign_copy2bytes: 443 strb r2, [r0], #1 444 strb r3, [r0] 445 m_ret inst=pop 446 447strcpy_unalign_copy3bytes: 448 strh r2, [r0], #2 449 lsr r2, #16 450 strb r2, [r0] 451 m_ret inst=pop 452 453strcpy_unalign_copy4bytes: 454 stmia r0, {r2} 455 m_ret inst=pop 456END(strcpy) 457