• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017 Imagination Technologies.
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  *      * Redistributions of source code must retain the above copyright
11  *        notice, this list of conditions and the following disclaimer.
12  *      * Redistributions in binary form must reproduce the above copyright
13  *        notice, this list of conditions and the following disclaimer
14  *        in the documentation and/or other materials provided with
15  *        the distribution.
16  *      * Neither the name of Imagination Technologies nor the names of its
17  *        contributors may be used to endorse or promote products derived
18  *        from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <string.h>
34 #include <stdint.h>
35 
36 #define ENABLE_PREFETCH 1
37 
38 #define STRNG(X) #X
39 #define PREFETCH(src_ptr, offset)  \
40   asm("pref 0, " STRNG(offset) "(%[src]) \n\t" : : [src] "r" (src_ptr));
41 
42 #if !defined(UNALIGNED_INSTR_SUPPORT)
43 /* does target have unaligned lw/ld/ualw/uald instructions? */
44 #define UNALIGNED_INSTR_SUPPORT 0
45 #if __mips_isa_rev < 6 && !__mips1
46 #undef UNALIGNED_INSTR_SUPPORT
47 #define UNALIGNED_INSTR_SUPPORT 1
48 #endif
49 #endif
50 
51 #if !defined(HW_UNALIGNED_SUPPORT)
52 /* Does target have hardware support for unaligned accesses?  */
53 #define HW_UNALIGNED_SUPPORT 0
54 #if __mips_isa_rev >= 6
55 #undef HW_UNALIGNED_SUPPORT
56 #define HW_UNALIGNED_SUPPORT 1
57 #endif
58 #endif
59 
60 #define SIZEOF_reg_t 4
61 #if _MIPS_SIM == _ABIO32
62 typedef unsigned long reg_t;
63 typedef struct bits
64 {
65   reg_t B0:8, B1:8, B2:8, B3:8;
66 } bits_t;
67 #else
68 #undef SIZEOF_reg_t
69 #define SIZEOF_reg_t 8
70 typedef unsigned long long reg_t;
71 typedef struct bits
72 {
73     reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
74 } bits_t;
75 #endif
76 
77 /* This union assumes that small structures can be in registers.  If
78    not, then memory accesses will be done - not optimal, but ok.  */
79 typedef union
80 {
81   reg_t v;
82   bits_t b;
83 } bitfields_t;
84 
85 #define do_bitfield(__i) \
86   if (x.b.B##__i != y.b.B##__i) return x.b.B##__i - y.b.B##__i;
87 
88 /* pull apart the words to find the first differing unsigned byte.  */
do_by_bitfields(reg_t a,reg_t b)89 static int __attribute__ ((noinline)) do_by_bitfields (reg_t a, reg_t b)
90 {
91   bitfields_t x, y;
92   x.v = a;
93   y.v = b;
94   do_bitfield (0);
95   do_bitfield (1);
96   do_bitfield (2);
97 #if SIZEOF_reg_t == 4
98   return x.b.B3 - y.b.B3;
99 #else
100   do_bitfield (3);
101   do_bitfield (4);
102   do_bitfield (5);
103   do_bitfield (6);
104   return x.b.B7 - y.b.B7;
105 #endif
106 }
107 
108 /* This code is called when aligning a pointer, there are remaining bytes
109    after doing word compares, or architecture does not have some form
110    of unaligned support.  */
111 static inline int __attribute__ ((always_inline))
do_bytes(const void * a,const void * b,unsigned long len)112 do_bytes (const void *a, const void *b, unsigned long len)
113 {
114   unsigned char *x = (unsigned char *) a;
115   unsigned char *y = (unsigned char *) b;
116   unsigned long i;
117 
118   /* 'len' might be zero here, so preloading the first two values
119      before the loop may access unallocated memory.  */
120   for (i = 0; i < len; i++) {
121     if (*x != *y)
122       return *x - *y;
123     x++;
124     y++;
125   }
126   return 0;
127 }
128 
129 #if !HW_UNALIGNED_SUPPORT
130 #if UNALIGNED_INSTR_SUPPORT
131 /* for MIPS GCC, there are no unaligned builtins - so this struct forces
132    the compiler to treat the pointer access as unaligned.  */
133 struct ulw
134 {
135   reg_t uli;
136 } __attribute__ ((packed));
137 
138 /* first pointer is not aligned while second pointer is.  */
unaligned_words(const struct ulw * a,const reg_t * b,unsigned long words,unsigned long bytes)139 static int unaligned_words (const struct ulw *a, const reg_t *b,
140                             unsigned long words, unsigned long bytes)
141 {
142 #if ENABLE_PREFETCH
143   /* prefetch pointer aligned to 32 byte boundary */
144   const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
145   const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
146 #endif
147   for (; words >= 16; words -= 8) {
148 #if ENABLE_PREFETCH
149     pref_ptr += 8;
150     PREFETCH(pref_ptr, 0);
151     PREFETCH(pref_ptr, 32);
152 
153     pref_ptr_a += 8;
154     PREFETCH(pref_ptr_a, 0);
155     PREFETCH(pref_ptr_a, 32);
156 #endif
157     reg_t x0 = a[0].uli, x1 = a[1].uli;
158     reg_t x2 = a[2].uli, x3 = a[3].uli;
159     reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
160     if (x0 != y0)
161       return do_by_bitfields (x0, y0);
162     if (x1 != y1)
163       return do_by_bitfields (x1, y1);
164     if (x2 != y2)
165       return do_by_bitfields (x2, y2);
166     if (x3 != y3)
167       return do_by_bitfields (x3, y3);
168 
169     x0 = a[4].uli; x1 = a[5].uli;
170     x2 = a[6].uli; x3 = a[7].uli;
171     y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
172     if (x0 != y0)
173       return do_by_bitfields (x0, y0);
174     if (x1 != y1)
175       return do_by_bitfields (x1, y1);
176     if (x2 != y2)
177       return do_by_bitfields (x2, y2);
178     if (x3 != y3)
179       return do_by_bitfields (x3, y3);
180 
181     a += 8;
182     b += 8;
183   }
184 
185   for (; words >= 4; words -= 4) {
186     reg_t x0 = a[0].uli, x1 = a[1].uli;
187     reg_t x2 = a[2].uli, x3 = a[3].uli;
188     reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
189     if (x0 != y0)
190       return do_by_bitfields (x0, y0);
191     if (x1 != y1)
192       return do_by_bitfields (x1, y1);
193     if (x2 != y2)
194       return do_by_bitfields (x2, y2);
195     if (x3 != y3)
196       return do_by_bitfields (x3, y3);
197     a += 4;
198     b += 4;
199   }
200 
201   /* do remaining words.  */
202   while (words--) {
203     reg_t x0 = a->uli;
204     reg_t y0 = *b;
205     a += 1;
206     b += 1;
207     if (x0 != y0)
208       return do_by_bitfields (x0, y0);
209   }
210 
211   /* mop up any remaining bytes.  */
212   return do_bytes (a, b, bytes);
213 }
214 #else
215 /* no HW support or unaligned lw/ld/ualw/uald instructions.  */
unaligned_words(const reg_t * a,const reg_t * b,unsigned long words,unsigned long bytes)216 static int unaligned_words (const reg_t *a, const reg_t *b,
217                             unsigned long words, unsigned long bytes)
218 {
219   return do_bytes (a, b, (sizeof (reg_t) * words) + bytes);
220 }
221 #endif /* UNALIGNED_INSTR_SUPPORT */
222 #endif /* HW_UNALIGNED_SUPPORT */
223 
224 /* both pointers are aligned, or first isn't and HW support for unaligned.  */
aligned_words(const reg_t * a,const reg_t * b,unsigned long words,unsigned long bytes)225 static int aligned_words (const reg_t *a, const reg_t *b,
226                           unsigned long words, unsigned long bytes)
227 {
228 #if ENABLE_PREFETCH
229   /* prefetch pointer aligned to 32 byte boundary */
230   const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
231   const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
232 #endif
233 
234   for (; words >= 24; words -= 12) {
235 #if ENABLE_PREFETCH
236     pref_ptr += 12;
237     PREFETCH(pref_ptr, 0);
238     PREFETCH(pref_ptr, 32);
239     PREFETCH(pref_ptr, 64);
240 
241     pref_ptr_a += 12;
242     PREFETCH(pref_ptr_a, 0);
243     PREFETCH(pref_ptr_a, 32);
244     PREFETCH(pref_ptr_a, 64);
245 #endif
246     reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
247     reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
248     if (x0 != y0)
249       return do_by_bitfields (x0, y0);
250     if (x1 != y1)
251       return do_by_bitfields (x1, y1);
252     if (x2 != y2)
253       return do_by_bitfields (x2, y2);
254     if (x3 != y3)
255       return do_by_bitfields (x3, y3);
256 
257     x0 = a[4]; x1 = a[5]; x2 = a[6]; x3 = a[7];
258     y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
259     if (x0 != y0)
260       return do_by_bitfields (x0, y0);
261     if (x1 != y1)
262       return do_by_bitfields (x1, y1);
263     if (x2 != y2)
264       return do_by_bitfields (x2, y2);
265     if (x3 != y3)
266       return do_by_bitfields (x3, y3);
267 
268     x0 = a[8]; x1 = a[9]; x2 = a[10]; x3 = a[11];
269     y0 = b[8]; y1 = b[9]; y2 = b[10]; y3 = b[11];
270     if (x0 != y0)
271       return do_by_bitfields (x0, y0);
272     if (x1 != y1)
273       return do_by_bitfields (x1, y1);
274     if (x2 != y2)
275       return do_by_bitfields (x2, y2);
276     if (x3 != y3)
277       return do_by_bitfields (x3, y3);
278 
279     a += 12;
280     b += 12;
281   }
282 
283   for (; words >= 4; words -= 4) {
284     reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
285     reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
286     if (x0 != y0)
287       return do_by_bitfields (x0, y0);
288     if (x1 != y1)
289       return do_by_bitfields (x1, y1);
290     if (x2 != y2)
291       return do_by_bitfields (x2, y2);
292     if (x3 != y3)
293       return do_by_bitfields (x3, y3);
294     a += 4;
295     b += 4;
296   }
297 
298   /* do remaining words.  */
299   while (words--) {
300     reg_t x0 = *a;
301     reg_t y0 = *b;
302     a += 1;
303     b += 1;
304     if (x0 != y0)
305       return do_by_bitfields (x0, y0);
306   }
307 
308   /* mop up any remaining bytes.  */
309   return do_bytes (a, b, bytes);
310 }
311 
memcmp(const void * a,const void * b,size_t len)312 int memcmp (const void *a, const void *b, size_t len)
313 {
314   unsigned long bytes, words;
315 
316   /* shouldn't hit that often.  */
317   if (len < sizeof (reg_t) * 4) {
318     return do_bytes (a, b, len);
319   }
320 
321   /* Align the second pointer to word/dword alignment.
322      Note that the pointer is only 32-bits for o32/n32 ABIs. For
323      n32, loads are done as 64-bit while address remains 32-bit.   */
324   bytes = ((unsigned long) b) % sizeof (reg_t);
325   if (bytes) {
326     int res;
327     bytes = sizeof (reg_t) - bytes;
328     if (bytes > len)
329       bytes = len;
330     res = do_bytes (a, b, bytes);
331     if (res || len == bytes)
332       return res;
333     len -= bytes;
334     a = (const void *) (((unsigned char *) a) + bytes);
335     b = (const void *) (((unsigned char *) b) + bytes);
336   }
337 
338   /* Second pointer now aligned.  */
339   words = len / sizeof (reg_t);
340   bytes = len % sizeof (reg_t);
341 
342 #if HW_UNALIGNED_SUPPORT
343   /* treat possible unaligned first pointer as aligned.  */
344   return aligned_words (a, b, words, bytes);
345 #else
346   if (((unsigned long) a) % sizeof (reg_t) == 0) {
347     return aligned_words (a, b, words, bytes);
348   }
349   /* need to use unaligned instructions on first pointer.  */
350   return unaligned_words (a, b, words, bytes);
351 #endif
352 }
353