1 // Fast memory copying and comparison routines.
2 // strings::fastmemcmp_inlined() replaces memcmp()
3 // strings::memcpy_inlined() replaces memcpy()
4 // strings::memeq(a, b, n) replaces memcmp(a, b, n) == 0
5 //
6 // strings::*_inlined() routines are inline versions of the
7 // routines exported by this module. Sometimes using the inlined
8 // versions is faster. Measure before using the inlined versions.
9
10 #ifndef DYNAMIC_DEPTH_INTERNAL_STRINGS_FASTMEM_H_ // NOLINT
11 #define DYNAMIC_DEPTH_INTERNAL_STRINGS_FASTMEM_H_ // NOLINT
12
13 #include <stddef.h>
14 #include <stdint.h>
15 #include <stdio.h>
16 #include <string.h>
17
18 #include "base/integral_types.h"
19 #include "base/macros.h"
20 #include "base/port.h"
21
22 namespace dynamic_depth {
23 namespace strings {
24
25 // Return true if the n bytes at a equal the n bytes at b.
26 // The regions are allowed to overlap.
27 //
28 // The performance is similar to the performance of memcmp(), but faster for
29 // moderately-sized inputs, or inputs that share a common prefix and differ
30 // somewhere in their last 8 bytes. Further optimizations can be added later
31 // if it makes sense to do so. Alternatively, if the compiler & runtime improve
32 // to eliminate the need for this, we can remove it. Please keep this in sync
33 // with google_internal::gg_memeq() in //third_party/stl/gcc3/string.
memeq(const char * a,const char * b,size_t n)34 inline bool memeq(const char* a, const char* b, size_t n) {
35 size_t n_rounded_down = n & ~static_cast<size_t>(7);
36 if (PREDICT_FALSE(n_rounded_down == 0)) { // n <= 7
37 return memcmp(a, b, n) == 0;
38 }
39 // n >= 8
40 uint64 u = UNALIGNED_LOAD64(a) ^ UNALIGNED_LOAD64(b);
41 uint64 v = UNALIGNED_LOAD64(a + n - 8) ^ UNALIGNED_LOAD64(b + n - 8);
42 if ((u | v) != 0) { // The first or last 8 bytes differ.
43 return false;
44 }
45 // The next line forces n to be a multiple of 8.
46 n = n_rounded_down;
47 if (n >= 80) {
48 // In 2013 or later, this should be fast on long strings.
49 return memcmp(a, b, n) == 0;
50 }
51 // Now force n to be a multiple of 16. Arguably, a "switch" would be smart
52 // here, but there's a difficult-to-evaluate code size vs. speed issue. The
53 // current approach often re-compares some bytes (worst case is if n initially
54 // was 16, 32, 48, or 64), but is fairly short.
55 size_t e = n & 8;
56 a += e;
57 b += e;
58 n -= e;
59 // n is now in {0, 16, 32, ...}. Process 0 or more 16-byte chunks.
60 while (n > 0) {
61 uint64 x = UNALIGNED_LOAD64(a) ^ UNALIGNED_LOAD64(b);
62 uint64 y = UNALIGNED_LOAD64(a + 8) ^ UNALIGNED_LOAD64(b + 8);
63 if ((x | y) != 0) {
64 return false;
65 }
66 a += 16;
67 b += 16;
68 n -= 16;
69 }
70 return true;
71 }
72
fastmemcmp_inlined(const void * va,const void * vb,size_t n)73 inline int fastmemcmp_inlined(const void* va, const void* vb, size_t n) {
74 const unsigned char* pa = static_cast<const unsigned char*>(va);
75 const unsigned char* pb = static_cast<const unsigned char*>(vb);
76 switch (n) {
77 default:
78 return memcmp(va, vb, n);
79 case 7:
80 if (*pa != *pb) return *pa < *pb ? -1 : +1;
81 ++pa;
82 ++pb;
83 FALLTHROUGH_INTENDED;
84 case 6:
85 if (*pa != *pb) return *pa < *pb ? -1 : +1;
86 ++pa;
87 ++pb;
88 FALLTHROUGH_INTENDED;
89 case 5:
90 if (*pa != *pb) return *pa < *pb ? -1 : +1;
91 ++pa;
92 ++pb;
93 FALLTHROUGH_INTENDED;
94 case 4:
95 if (*pa != *pb) return *pa < *pb ? -1 : +1;
96 ++pa;
97 ++pb;
98 FALLTHROUGH_INTENDED;
99 case 3:
100 if (*pa != *pb) return *pa < *pb ? -1 : +1;
101 ++pa;
102 ++pb;
103 FALLTHROUGH_INTENDED;
104 case 2:
105 if (*pa != *pb) return *pa < *pb ? -1 : +1;
106 ++pa;
107 ++pb;
108 FALLTHROUGH_INTENDED;
109 case 1:
110 if (*pa != *pb) return *pa < *pb ? -1 : +1;
111 FALLTHROUGH_INTENDED;
112 case 0:
113 break;
114 }
115 return 0;
116 }
117
118 // The standard memcpy operation is slow for variable small sizes.
119 // This implementation inlines the optimal realization for sizes 1 to 16.
120 // To avoid code bloat don't use it in case of not performance-critical spots,
121 // nor when you don't expect very frequent values of size <= 16.
memcpy_inlined(char * dst,const char * src,size_t size)122 inline void memcpy_inlined(char* dst, const char* src, size_t size) {
123 // Compiler inlines code with minimal amount of data movement when third
124 // parameter of memcpy is a constant.
125 switch (size) {
126 case 1:
127 memcpy(dst, src, 1);
128 break;
129 case 2:
130 memcpy(dst, src, 2);
131 break;
132 case 3:
133 memcpy(dst, src, 3);
134 break;
135 case 4:
136 memcpy(dst, src, 4);
137 break;
138 case 5:
139 memcpy(dst, src, 5);
140 break;
141 case 6:
142 memcpy(dst, src, 6);
143 break;
144 case 7:
145 memcpy(dst, src, 7);
146 break;
147 case 8:
148 memcpy(dst, src, 8);
149 break;
150 case 9:
151 memcpy(dst, src, 9);
152 break;
153 case 10:
154 memcpy(dst, src, 10);
155 break;
156 case 11:
157 memcpy(dst, src, 11);
158 break;
159 case 12:
160 memcpy(dst, src, 12);
161 break;
162 case 13:
163 memcpy(dst, src, 13);
164 break;
165 case 14:
166 memcpy(dst, src, 14);
167 break;
168 case 15:
169 memcpy(dst, src, 15);
170 break;
171 case 16:
172 memcpy(dst, src, 16);
173 break;
174 default:
175 memcpy(dst, src, size);
176 break;
177 }
178 }
179
180 } // namespace strings
181 } // namespace dynamic_depth
182
183 #endif // DYNAMIC_DEPTH_INTERNAL_STRINGS_FASTMEM_H_ // NOLINT
184