• Home
  • Raw
  • Download

Lines Matching +full:mod +full:- +full:12 +full:b

2  * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions
9 * Copyright (C) 2002 - 2012 Tensilica Inc.
24 * 32-bit load and store instructions (as required for these
39 * This code tries to use fall-through branches for the common
64 .byte 0 # 1 mod 4 alignment for LOOPNEZ
65 # (0 mod 4 alignment for LBEG)
95 addi a4, a4, -1
100 .Ldst2mod4: # dst 16-bit aligned
106 addi a4, a4, -2
119 _bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
120 _bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
121 .Ldstaligned: # return here from .Ldst?mod? once dst is aligned
122 srli a7, a4, 4 # number of loop iterations with 16B
127 * Destination and source are word-aligned, use word copy.
129 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
135 add a8, a8, a3 # a8 = end of last 16B source chunk
143 l32i a7, a3, 12
146 s32i a7, a5, 12
194 _beqz a4, .Ldone # avoid loading anything for zero-length copies
195 # copy 16 bytes per iteration for word-aligned dst and unaligned src
211 add a10, a10, a3 # a10 = end of last 16B source chunk
218 l32i a9, a3, 12
226 s32i a9, a5, 12
300 * 32-bit load and store instructions (as required for these
330 .byte 0 # 1 mod 4 alignment for LOOPNEZ
331 # (0 mod 4 alignment for LBEG)
340 addi a3, a3, -1
342 addi a5, a5, -1
360 addi a3, a3, -1
362 addi a5, a5, -1
364 addi a4, a4, -1
367 .Lbackdst2mod4: # dst 16-bit aligned
370 addi a3, a3, -2
373 addi a5, a5, -2
376 addi a4, a4, -2
393 _bbsi.l a5, 0, .Lbackdst1mod2 # if dst is 1 mod 2
394 _bbsi.l a5, 1, .Lbackdst2mod4 # if dst is 2 mod 4
395 .Lbackdstaligned: # return here from .Lbackdst?mod? once dst is aligned
396 srli a7, a4, 4 # number of loop iterations with 16B
401 * Destination and source are word-aligned, use word copy.
403 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
409 sub a8, a3, a8 # a8 = start of first 16B source chunk
412 addi a3, a3, -16
413 l32i a7, a3, 12
415 addi a5, a5, -16
416 s32i a7, a5, 12
428 addi a3, a3, -8
431 addi a5, a5, -8
441 addi a3, a3, -4
443 addi a5, a5, -4
450 addi a3, a3, -2
452 addi a5, a5, -2
458 addi a3, a3, -1
460 addi a5, a5, -1
470 _beqz a4, .Lbackdone # avoid loading anything for zero-length copies
471 # copy 16 bytes per iteration for word-aligned dst and unaligned src
486 sub a10, a3, a10 # a10 = start of first 16B source chunk
489 addi a3, a3, -16
490 l32i a7, a3, 12
492 addi a5, a5, -16
494 s32i a6, a5, 12
509 addi a3, a3, -8
512 addi a5, a5, -8
521 addi a3, a3, -4
523 addi a5, a5, -4
537 addi a3, a3, -2
540 addi a5, a5, -2
547 addi a3, a3, -1
548 addi a5, a5, -1