1 // Copyright 2018 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_UTILS_MEMCOPY_H_
6 #define V8_UTILS_MEMCOPY_H_
7
8 #include <stdint.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <algorithm>
12
13 #include "src/base/logging.h"
14 #include "src/base/macros.h"
15
16 namespace v8 {
17 namespace internal {
18
19 using Address = uintptr_t;
20
21 // ----------------------------------------------------------------------------
22 // Generated memcpy/memmove for ia32, arm, and mips.
23
24 void init_memcopy_functions();
25
26 #if defined(V8_TARGET_ARCH_IA32)
27 // Limit below which the extra overhead of the MemCopy function is likely
28 // to outweigh the benefits of faster copying.
29 const size_t kMinComplexMemCopy = 64;
30
31 // Copy memory area. No restrictions.
32 V8_EXPORT_PRIVATE void MemMove(void* dest, const void* src, size_t size);
33 using MemMoveFunction = void (*)(void* dest, const void* src, size_t size);
34
35 // Keep the distinction of "move" vs. "copy" for the benefit of other
36 // architectures.
MemCopy(void * dest,const void * src,size_t size)37 V8_INLINE void MemCopy(void* dest, const void* src, size_t size) {
38 MemMove(dest, src, size);
39 }
40 #elif defined(V8_HOST_ARCH_ARM)
41 using MemCopyUint8Function = void (*)(uint8_t* dest, const uint8_t* src,
42 size_t size);
43 V8_EXPORT_PRIVATE extern MemCopyUint8Function memcopy_uint8_function;
MemCopyUint8Wrapper(uint8_t * dest,const uint8_t * src,size_t chars)44 V8_INLINE void MemCopyUint8Wrapper(uint8_t* dest, const uint8_t* src,
45 size_t chars) {
46 memcpy(dest, src, chars);
47 }
48 // For values < 16, the assembler function is slower than the inlined C code.
49 const size_t kMinComplexMemCopy = 16;
MemCopy(void * dest,const void * src,size_t size)50 V8_INLINE void MemCopy(void* dest, const void* src, size_t size) {
51 (*memcopy_uint8_function)(reinterpret_cast<uint8_t*>(dest),
52 reinterpret_cast<const uint8_t*>(src), size);
53 }
MemMove(void * dest,const void * src,size_t size)54 V8_EXPORT_PRIVATE V8_INLINE void MemMove(void* dest, const void* src,
55 size_t size) {
56 memmove(dest, src, size);
57 }
58
59 // For values < 12, the assembler function is slower than the inlined C code.
60 const int kMinComplexConvertMemCopy = 12;
61 #elif defined(V8_HOST_ARCH_MIPS)
62 using MemCopyUint8Function = void (*)(uint8_t* dest, const uint8_t* src,
63 size_t size);
64 V8_EXPORT_PRIVATE extern MemCopyUint8Function memcopy_uint8_function;
MemCopyUint8Wrapper(uint8_t * dest,const uint8_t * src,size_t chars)65 V8_INLINE void MemCopyUint8Wrapper(uint8_t* dest, const uint8_t* src,
66 size_t chars) {
67 memcpy(dest, src, chars);
68 }
69 // For values < 16, the assembler function is slower than the inlined C code.
70 const size_t kMinComplexMemCopy = 16;
MemCopy(void * dest,const void * src,size_t size)71 V8_INLINE void MemCopy(void* dest, const void* src, size_t size) {
72 (*memcopy_uint8_function)(reinterpret_cast<uint8_t*>(dest),
73 reinterpret_cast<const uint8_t*>(src), size);
74 }
MemMove(void * dest,const void * src,size_t size)75 V8_EXPORT_PRIVATE V8_INLINE void MemMove(void* dest, const void* src,
76 size_t size) {
77 memmove(dest, src, size);
78 }
79 #else
80 // Copy memory area to disjoint memory area.
MemCopy(void * dest,const void * src,size_t size)81 inline void MemCopy(void* dest, const void* src, size_t size) {
82 // Fast path for small sizes. The compiler will expand the {memcpy} for small
83 // fixed sizes to a sequence of move instructions. This avoids the overhead of
84 // the general {memcpy} function.
85 switch (size) {
86 #define CASE(N) \
87 case N: \
88 memcpy(dest, src, N); \
89 return;
90 CASE(1)
91 CASE(2)
92 CASE(3)
93 CASE(4)
94 CASE(5)
95 CASE(6)
96 CASE(7)
97 CASE(8)
98 CASE(9)
99 CASE(10)
100 CASE(11)
101 CASE(12)
102 CASE(13)
103 CASE(14)
104 CASE(15)
105 CASE(16)
106 #undef CASE
107 default:
108 memcpy(dest, src, size);
109 return;
110 }
111 }
MemMove(void * dest,const void * src,size_t size)112 V8_EXPORT_PRIVATE inline void MemMove(void* dest, const void* src,
113 size_t size) {
114 // Fast path for small sizes. The compiler will expand the {memmove} for small
115 // fixed sizes to a sequence of move instructions. This avoids the overhead of
116 // the general {memmove} function.
117 switch (size) {
118 #define CASE(N) \
119 case N: \
120 memmove(dest, src, N); \
121 return;
122 CASE(1)
123 CASE(2)
124 CASE(3)
125 CASE(4)
126 CASE(5)
127 CASE(6)
128 CASE(7)
129 CASE(8)
130 CASE(9)
131 CASE(10)
132 CASE(11)
133 CASE(12)
134 CASE(13)
135 CASE(14)
136 CASE(15)
137 CASE(16)
138 #undef CASE
139 default:
140 memmove(dest, src, size);
141 return;
142 }
143 }
144 const size_t kMinComplexMemCopy = 8;
145 #endif // V8_TARGET_ARCH_IA32
146
147 // Copies words from |src| to |dst|. The data spans must not overlap.
148 // |src| and |dst| must be TWord-size aligned.
149 template <size_t kBlockCopyLimit, typename T>
CopyImpl(T * dst_ptr,const T * src_ptr,size_t count)150 inline void CopyImpl(T* dst_ptr, const T* src_ptr, size_t count) {
151 constexpr int kTWordSize = sizeof(T);
152 #ifdef DEBUG
153 Address dst = reinterpret_cast<Address>(dst_ptr);
154 Address src = reinterpret_cast<Address>(src_ptr);
155 DCHECK(IsAligned(dst, kTWordSize));
156 DCHECK(IsAligned(src, kTWordSize));
157 DCHECK(((src <= dst) && ((src + count * kTWordSize) <= dst)) ||
158 ((dst <= src) && ((dst + count * kTWordSize) <= src)));
159 #endif
160 if (count == 0) return;
161
162 // Use block copying MemCopy if the segment we're copying is
163 // enough to justify the extra call/setup overhead.
164 if (count < kBlockCopyLimit) {
165 do {
166 count--;
167 *dst_ptr++ = *src_ptr++;
168 } while (count > 0);
169 } else {
170 MemCopy(dst_ptr, src_ptr, count * kTWordSize);
171 }
172 }
173
174 // Copies kSystemPointerSize-sized words from |src| to |dst|. The data spans
175 // must not overlap. |src| and |dst| must be kSystemPointerSize-aligned.
CopyWords(Address dst,const Address src,size_t num_words)176 inline void CopyWords(Address dst, const Address src, size_t num_words) {
177 static const size_t kBlockCopyLimit = 16;
178 CopyImpl<kBlockCopyLimit>(reinterpret_cast<Address*>(dst),
179 reinterpret_cast<const Address*>(src), num_words);
180 }
181
182 // Copies data from |src| to |dst|. The data spans must not overlap.
183 template <typename T>
CopyBytes(T * dst,const T * src,size_t num_bytes)184 inline void CopyBytes(T* dst, const T* src, size_t num_bytes) {
185 STATIC_ASSERT(sizeof(T) == 1);
186 if (num_bytes == 0) return;
187 CopyImpl<kMinComplexMemCopy>(dst, src, num_bytes);
188 }
189
MemsetUint32(uint32_t * dest,uint32_t value,size_t counter)190 inline void MemsetUint32(uint32_t* dest, uint32_t value, size_t counter) {
191 #if V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64
192 #define STOS "stosl"
193 #endif
194
195 #if defined(MEMORY_SANITIZER)
196 // MemorySanitizer does not understand inline assembly.
197 #undef STOS
198 #endif
199
200 #if defined(__GNUC__) && defined(STOS)
201 asm volatile(
202 "cld;"
203 "rep ; " STOS
204 : "+&c"(counter), "+&D"(dest)
205 : "a"(value)
206 : "memory", "cc");
207 #else
208 for (size_t i = 0; i < counter; i++) {
209 dest[i] = value;
210 }
211 #endif
212
213 #undef STOS
214 }
215
MemsetPointer(Address * dest,Address value,size_t counter)216 inline void MemsetPointer(Address* dest, Address value, size_t counter) {
217 #if V8_HOST_ARCH_IA32
218 #define STOS "stosl"
219 #elif V8_HOST_ARCH_X64
220 #define STOS "stosq"
221 #endif
222
223 #if defined(MEMORY_SANITIZER)
224 // MemorySanitizer does not understand inline assembly.
225 #undef STOS
226 #endif
227
228 #if defined(__GNUC__) && defined(STOS)
229 asm volatile(
230 "cld;"
231 "rep ; " STOS
232 : "+&c"(counter), "+&D"(dest)
233 : "a"(value)
234 : "memory", "cc");
235 #else
236 for (size_t i = 0; i < counter; i++) {
237 dest[i] = value;
238 }
239 #endif
240
241 #undef STOS
242 }
243
244 template <typename T, typename U>
MemsetPointer(T ** dest,U * value,size_t counter)245 inline void MemsetPointer(T** dest, U* value, size_t counter) {
246 #ifdef DEBUG
247 T* a = nullptr;
248 U* b = nullptr;
249 a = b; // Fake assignment to check assignability.
250 USE(a);
251 #endif // DEBUG
252 MemsetPointer(reinterpret_cast<Address*>(dest),
253 reinterpret_cast<Address>(value), counter);
254 }
255
256 // Copy from 8bit/16bit chars to 8bit/16bit chars. Values are zero-extended if
257 // needed. Ranges are not allowed to overlap.
258 // The separate declaration is needed for the V8_NONNULL, which is not allowed
259 // on a definition.
260 template <typename SrcType, typename DstType>
261 void CopyChars(DstType* dst, const SrcType* src, size_t count) V8_NONNULL(1, 2);
262
263 template <typename SrcType, typename DstType>
CopyChars(DstType * dst,const SrcType * src,size_t count)264 void CopyChars(DstType* dst, const SrcType* src, size_t count) {
265 STATIC_ASSERT(std::is_integral<SrcType>::value);
266 STATIC_ASSERT(std::is_integral<DstType>::value);
267 using SrcTypeUnsigned = typename std::make_unsigned<SrcType>::type;
268 using DstTypeUnsigned = typename std::make_unsigned<DstType>::type;
269
270 #ifdef DEBUG
271 // Check for no overlap, otherwise {std::copy_n} cannot be used.
272 Address src_start = reinterpret_cast<Address>(src);
273 Address src_end = src_start + count * sizeof(SrcType);
274 Address dst_start = reinterpret_cast<Address>(dst);
275 Address dst_end = dst_start + count * sizeof(DstType);
276 DCHECK(src_end <= dst_start || dst_end <= src_start);
277 #endif
278
279 auto* dst_u = reinterpret_cast<DstTypeUnsigned*>(dst);
280 auto* src_u = reinterpret_cast<const SrcTypeUnsigned*>(src);
281
282 // Especially Atom CPUs profit from this explicit instantiation for small
283 // counts. This gives up to 20 percent improvement for microbenchmarks such as
284 // joining an array of small integers (2019-10-16).
285 switch (count) {
286 #define CASE(N) \
287 case N: \
288 std::copy_n(src_u, N, dst_u); \
289 return;
290 CASE(1)
291 CASE(2)
292 CASE(3)
293 CASE(4)
294 CASE(5)
295 CASE(6)
296 CASE(7)
297 CASE(8)
298 CASE(9)
299 CASE(10)
300 CASE(11)
301 CASE(12)
302 CASE(13)
303 CASE(14)
304 CASE(15)
305 CASE(16)
306 #undef CASE
307 default:
308 std::copy_n(src_u, count, dst_u);
309 return;
310 }
311 }
312
313 } // namespace internal
314 } // namespace v8
315
316 #endif // V8_UTILS_MEMCOPY_H_
317