• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//
2// Copyright (c) 2013 - 2016, Linaro Limited
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//     * Redistributions of source code must retain the above copyright
8//       notice, this list of conditions and the following disclaimer.
9//     * Redistributions in binary form must reproduce the above copyright
10//       notice, this list of conditions and the following disclaimer in the
11//       documentation and/or other materials provided with the distribution.
12//     * Neither the name of the Linaro nor the
13//       names of its contributors may be used to endorse or promote products
14//       derived from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27//
28
29// Parameters and result.
30#define src1      r0
31#define src2      r1
32#define limit     r2
33#define result    r0
34
35// Internal variables.
36#define data1     r3
37#define data2     r4
38#define limit_wd  r5
39#define diff      r6
40#define tmp1      r7
41#define tmp2      r12
42#define pos       r8
43#define mask      r14
44
45    .text
46    .thumb
47    .syntax unified
48    .align  5
49ASM_GLOBAL ASM_PFX(InternalMemCompareMem)
50ASM_PFX(InternalMemCompareMem):
51    push    {r4-r8, lr}
52    eor     tmp1, src1, src2
53    tst     tmp1, #3
54    bne     .Lmisaligned4
55    ands    tmp1, src1, #3
56    bne     .Lmutual_align
57    add     limit_wd, limit, #3
58    nop.w
59    lsr     limit_wd, limit_wd, #2
60
61    // Start of performance-critical section  -- one 32B cache line.
62.Lloop_aligned:
63    ldr     data1, [src1], #4
64    ldr     data2, [src2], #4
65.Lstart_realigned:
66    subs    limit_wd, limit_wd, #1
67    eor     diff, data1, data2        // Non-zero if differences found.
68    cbnz    diff, 0f
69    bne     .Lloop_aligned
70    // End of performance-critical section  -- one 32B cache line.
71
72    // Not reached the limit, must have found a diff.
730:  cbnz    limit_wd, .Lnot_limit
74
75    // Limit % 4 == 0 => all bytes significant.
76    ands    limit, limit, #3
77    beq     .Lnot_limit
78
79    lsl     limit, limit, #3              // Bits -> bytes.
80    mov     mask, #~0
81    lsl     mask, mask, limit
82    bic     data1, data1, mask
83    bic     data2, data2, mask
84
85    orr     diff, diff, mask
86
87.Lnot_limit:
88    rev     diff, diff
89    rev     data1, data1
90    rev     data2, data2
91
92    // The MS-non-zero bit of DIFF marks either the first bit
93    // that is different, or the end of the significant data.
94    // Shifting left now will bring the critical information into the
95    // top bits.
96    clz     pos, diff
97    lsl     data1, data1, pos
98    lsl     data2, data2, pos
99
100    // But we need to zero-extend (char is unsigned) the value and then
101    // perform a signed 32-bit subtraction.
102    lsr     data1, data1, #28
103    sub     result, data1, data2, lsr #28
104    pop     {r4-r8, pc}
105
106.Lmutual_align:
107    // Sources are mutually aligned, but are not currently at an
108    // alignment boundary.  Round down the addresses and then mask off
109    // the bytes that precede the start point.
110    bic     src1, src1, #3
111    bic     src2, src2, #3
112    add     limit, limit, tmp1          // Adjust the limit for the extra.
113    lsl     tmp1, tmp1, #3              // Bytes beyond alignment -> bits.
114    ldr     data1, [src1], #4
115    rsb     tmp1, tmp1, #32             // Bits to alignment -32.
116    ldr     data2, [src2], #4
117    mov     tmp2, #~0
118
119    // Little-endian.  Early bytes are at LSB.
120    lsr     tmp2, tmp2, tmp1            // Shift (tmp1 & 31).
121    add     limit_wd, limit, #3
122    orr     data1, data1, tmp2
123    orr     data2, data2, tmp2
124    lsr     limit_wd, limit_wd, #2
125    b       .Lstart_realigned
126
127.Lmisaligned4:
128    sub     limit, limit, #1
1291:
130    // Perhaps we can do better than this.
131    ldrb    data1, [src1], #1
132    ldrb    data2, [src2], #1
133    subs    limit, limit, #1
134    it      cs
135    cmpcs.n data1, data2
136    beq     1b
137    sub     result, data1, data2
138    pop     {r4-r8, pc}
139