/* * Copyright (c) 2017 Imagination Technologies. * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with * the distribution. * * Neither the name of Imagination Technologies nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #define ENABLE_PREFETCH 1 #define STRNG(X) #X #define PREFETCH(src_ptr, offset) \ asm("pref 0, " STRNG(offset) "(%[src]) \n\t" : : [src] "r" (src_ptr)); #if !defined(UNALIGNED_INSTR_SUPPORT) /* does target have unaligned lw/ld/ualw/uald instructions? */ #define UNALIGNED_INSTR_SUPPORT 0 #if __mips_isa_rev < 6 && !__mips1 #undef UNALIGNED_INSTR_SUPPORT #define UNALIGNED_INSTR_SUPPORT 1 #endif #endif #if !defined(HW_UNALIGNED_SUPPORT) /* Does target have hardware support for unaligned accesses? */ #define HW_UNALIGNED_SUPPORT 0 #if __mips_isa_rev >= 6 #undef HW_UNALIGNED_SUPPORT #define HW_UNALIGNED_SUPPORT 1 #endif #endif #define SIZEOF_reg_t 4 #if _MIPS_SIM == _ABIO32 typedef unsigned long reg_t; typedef struct bits { reg_t B0:8, B1:8, B2:8, B3:8; } bits_t; #else #undef SIZEOF_reg_t #define SIZEOF_reg_t 8 typedef unsigned long long reg_t; typedef struct bits { reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8; } bits_t; #endif /* This union assumes that small structures can be in registers. If not, then memory accesses will be done - not optimal, but ok. */ typedef union { reg_t v; bits_t b; } bitfields_t; #define do_bitfield(__i) \ if (x.b.B##__i != y.b.B##__i) return x.b.B##__i - y.b.B##__i; /* pull apart the words to find the first differing unsigned byte. */ static int __attribute__ ((noinline)) do_by_bitfields (reg_t a, reg_t b) { bitfields_t x, y; x.v = a; y.v = b; do_bitfield (0); do_bitfield (1); do_bitfield (2); #if SIZEOF_reg_t == 4 return x.b.B3 - y.b.B3; #else do_bitfield (3); do_bitfield (4); do_bitfield (5); do_bitfield (6); return x.b.B7 - y.b.B7; #endif } /* This code is called when aligning a pointer, there are remaining bytes after doing word compares, or architecture does not have some form of unaligned support. */ static inline int __attribute__ ((always_inline)) do_bytes (const void *a, const void *b, unsigned long len) { unsigned char *x = (unsigned char *) a; unsigned char *y = (unsigned char *) b; unsigned long i; /* 'len' might be zero here, so preloading the first two values before the loop may access unallocated memory. */ for (i = 0; i < len; i++) { if (*x != *y) return *x - *y; x++; y++; } return 0; } #if !HW_UNALIGNED_SUPPORT #if UNALIGNED_INSTR_SUPPORT /* for MIPS GCC, there are no unaligned builtins - so this struct forces the compiler to treat the pointer access as unaligned. */ struct ulw { reg_t uli; } __attribute__ ((packed)); /* first pointer is not aligned while second pointer is. */ static int unaligned_words (const struct ulw *a, const reg_t *b, unsigned long words, unsigned long bytes) { #if ENABLE_PREFETCH /* prefetch pointer aligned to 32 byte boundary */ const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31); const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31); #endif for (; words >= 16; words -= 8) { #if ENABLE_PREFETCH pref_ptr += 8; PREFETCH(pref_ptr, 0); PREFETCH(pref_ptr, 32); pref_ptr_a += 8; PREFETCH(pref_ptr_a, 0); PREFETCH(pref_ptr_a, 32); #endif reg_t x0 = a[0].uli, x1 = a[1].uli; reg_t x2 = a[2].uli, x3 = a[3].uli; reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; if (x0 != y0) return do_by_bitfields (x0, y0); if (x1 != y1) return do_by_bitfields (x1, y1); if (x2 != y2) return do_by_bitfields (x2, y2); if (x3 != y3) return do_by_bitfields (x3, y3); x0 = a[4].uli; x1 = a[5].uli; x2 = a[6].uli; x3 = a[7].uli; y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7]; if (x0 != y0) return do_by_bitfields (x0, y0); if (x1 != y1) return do_by_bitfields (x1, y1); if (x2 != y2) return do_by_bitfields (x2, y2); if (x3 != y3) return do_by_bitfields (x3, y3); a += 8; b += 8; } for (; words >= 4; words -= 4) { reg_t x0 = a[0].uli, x1 = a[1].uli; reg_t x2 = a[2].uli, x3 = a[3].uli; reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; if (x0 != y0) return do_by_bitfields (x0, y0); if (x1 != y1) return do_by_bitfields (x1, y1); if (x2 != y2) return do_by_bitfields (x2, y2); if (x3 != y3) return do_by_bitfields (x3, y3); a += 4; b += 4; } /* do remaining words. */ while (words--) { reg_t x0 = a->uli; reg_t y0 = *b; a += 1; b += 1; if (x0 != y0) return do_by_bitfields (x0, y0); } /* mop up any remaining bytes. */ return do_bytes (a, b, bytes); } #else /* no HW support or unaligned lw/ld/ualw/uald instructions. */ static int unaligned_words (const reg_t *a, const reg_t *b, unsigned long words, unsigned long bytes) { return do_bytes (a, b, (sizeof (reg_t) * words) + bytes); } #endif /* UNALIGNED_INSTR_SUPPORT */ #endif /* HW_UNALIGNED_SUPPORT */ /* both pointers are aligned, or first isn't and HW support for unaligned. */ static int aligned_words (const reg_t *a, const reg_t *b, unsigned long words, unsigned long bytes) { #if ENABLE_PREFETCH /* prefetch pointer aligned to 32 byte boundary */ const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31); const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31); #endif for (; words >= 24; words -= 12) { #if ENABLE_PREFETCH pref_ptr += 12; PREFETCH(pref_ptr, 0); PREFETCH(pref_ptr, 32); PREFETCH(pref_ptr, 64); pref_ptr_a += 12; PREFETCH(pref_ptr_a, 0); PREFETCH(pref_ptr_a, 32); PREFETCH(pref_ptr_a, 64); #endif reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3]; reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; if (x0 != y0) return do_by_bitfields (x0, y0); if (x1 != y1) return do_by_bitfields (x1, y1); if (x2 != y2) return do_by_bitfields (x2, y2); if (x3 != y3) return do_by_bitfields (x3, y3); x0 = a[4]; x1 = a[5]; x2 = a[6]; x3 = a[7]; y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7]; if (x0 != y0) return do_by_bitfields (x0, y0); if (x1 != y1) return do_by_bitfields (x1, y1); if (x2 != y2) return do_by_bitfields (x2, y2); if (x3 != y3) return do_by_bitfields (x3, y3); x0 = a[8]; x1 = a[9]; x2 = a[10]; x3 = a[11]; y0 = b[8]; y1 = b[9]; y2 = b[10]; y3 = b[11]; if (x0 != y0) return do_by_bitfields (x0, y0); if (x1 != y1) return do_by_bitfields (x1, y1); if (x2 != y2) return do_by_bitfields (x2, y2); if (x3 != y3) return do_by_bitfields (x3, y3); a += 12; b += 12; } for (; words >= 4; words -= 4) { reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3]; reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; if (x0 != y0) return do_by_bitfields (x0, y0); if (x1 != y1) return do_by_bitfields (x1, y1); if (x2 != y2) return do_by_bitfields (x2, y2); if (x3 != y3) return do_by_bitfields (x3, y3); a += 4; b += 4; } /* do remaining words. */ while (words--) { reg_t x0 = *a; reg_t y0 = *b; a += 1; b += 1; if (x0 != y0) return do_by_bitfields (x0, y0); } /* mop up any remaining bytes. */ return do_bytes (a, b, bytes); } int memcmp (const void *a, const void *b, size_t len) { unsigned long bytes, words; /* shouldn't hit that often. */ if (len < sizeof (reg_t) * 4) { return do_bytes (a, b, len); } /* Align the second pointer to word/dword alignment. Note that the pointer is only 32-bits for o32/n32 ABIs. For n32, loads are done as 64-bit while address remains 32-bit. */ bytes = ((unsigned long) b) % sizeof (reg_t); if (bytes) { int res; bytes = sizeof (reg_t) - bytes; if (bytes > len) bytes = len; res = do_bytes (a, b, bytes); if (res || len == bytes) return res; len -= bytes; a = (const void *) (((unsigned char *) a) + bytes); b = (const void *) (((unsigned char *) b) + bytes); } /* Second pointer now aligned. */ words = len / sizeof (reg_t); bytes = len % sizeof (reg_t); #if HW_UNALIGNED_SUPPORT /* treat possible unaligned first pointer as aligned. */ return aligned_words (a, b, words, bytes); #else if (((unsigned long) a) % sizeof (reg_t) == 0) { return aligned_words (a, b, words, bytes); } /* need to use unaligned instructions on first pointer. */ return unaligned_words (a, b, words, bytes); #endif }