• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <string.h>
2 #include <stdint.h>
3 #include <endian.h>
4 
5 #ifdef __GNUC__
6 #if __BYTE_ORDER == __LITTLE_ENDIAN
7 #define LS >>
8 #define RS <<
9 #else
10 #define LS <<
11 #define RS >>
12 #endif
13 
14 #define MEMCPY_BYTE_BITS 8
15 #define MEMCPY_ALIGH_UNIT_BYTES_4       4
16 #define MEMCPY_FAST_COPY_UNIT_NUM_4     4
17 #define MEMCPY_FAST_COPY_UNIT_BYTES (MEMCPY_ALIGH_UNIT_BYTES_4 * MEMCPY_FAST_COPY_UNIT_NUM_4)
18 
19 #define MEMCPY_ALIGH_UNIT_BITS (MEMCPY_BYTE_BITS * MEMCPY_ALIGH_UNIT_BYTES_4)
20 
21 #define MEMCPY_FAST_COPY_OFFSET_UNIT_0 (MEMCPY_ALIGH_UNIT_BYTES_4 * 0)
22 #define MEMCPY_FAST_COPY_OFFSET_UNIT_1 (MEMCPY_ALIGH_UNIT_BYTES_4 * 1)
23 #define MEMCPY_FAST_COPY_OFFSET_UNIT_2 (MEMCPY_ALIGH_UNIT_BYTES_4 * 2)
24 #define MEMCPY_FAST_COPY_OFFSET_UNIT_3 (MEMCPY_ALIGH_UNIT_BYTES_4 * 3)
25 
26 #define MEMCPY_NOT_ALIGN_FAST_COPY_THRESHOLD (MEMCPY_FAST_COPY_UNIT_BYTES * 2)
27 
28 #define MEMCPY_OFFSET_BYTES_1 1
29 #define MEMCPY_ALIGH_OFFSET_BYTES_1 (MEMCPY_ALIGH_UNIT_BYTES_4 - MEMCPY_OFFSET_BYTES_1)
30 #define MEMCPY_OFFSET_BITS_1 (MEMCPY_BYTE_BITS * MEMCPY_OFFSET_BYTES_1)
31 #define MEMCPY_OFFSET_ALIGN_BITS_1 (MEMCPY_BYTE_BITS * MEMCPY_ALIGH_OFFSET_BYTES_1)
32 
33 #define MEMCPY_OFFSET_BYTES_2 2
34 #define MEMCPY_ALIGH_OFFSET_BYTES_2 (MEMCPY_ALIGH_UNIT_BYTES_4 - MEMCPY_OFFSET_BYTES_2)
35 #define MEMCPY_OFFSET_BITS_2 (MEMCPY_BYTE_BITS * MEMCPY_OFFSET_BYTES_2)
36 #define MEMCPY_OFFSET_ALIGN_BITS_2 (MEMCPY_BYTE_BITS * MEMCPY_ALIGH_OFFSET_BYTES_2)
37 
38 #define MEMCPY_OFFSET_BYTES_3 3
39 #define MEMCPY_ALIGH_OFFSET_BYTES_3 (MEMCPY_ALIGH_UNIT_BYTES_4 - MEMCPY_OFFSET_BYTES_3)
40 #define MEMCPY_OFFSET_BITS_3 (MEMCPY_BYTE_BITS * MEMCPY_OFFSET_BYTES_3)
41 #define MEMCPY_OFFSET_ALIGN_BITS_3 (MEMCPY_BYTE_BITS * MEMCPY_ALIGH_OFFSET_BYTES_3)
42 
43 #define MEMCPY_BYTE_CHECK_NUM_1     0x01
44 #define MEMCPY_BYTE_CHECK_NUM_2     0x02
45 #define MEMCPY_BYTE_CHECK_NUM_4     0x04
46 #define MEMCPY_BYTE_CHECK_NUM_8     0x08
47 #define MEMCPY_BYTE_CHECK_NUM_16    0x10
48 #endif
49 
memcpy(void * restrict dest,const void * restrict src,size_t num)50 void *memcpy(void *restrict dest, const void *restrict src, size_t num)
51 {
52     unsigned char *d = dest;
53     const unsigned char *s = src;
54     size_t n = num;
55 
56 #ifdef __GNUC__
57     typedef uint32_t __attribute__((__may_alias__)) u32;
58     uint32_t w, x;
59 
60     for (; (uintptr_t)s % MEMCPY_ALIGH_UNIT_BYTES_4 && n; n--) {
61         *d++ = *s++;
62     }
63 
64     if ((uintptr_t)d % MEMCPY_ALIGH_UNIT_BYTES_4 == 0) {
65         for (; n >= MEMCPY_FAST_COPY_UNIT_BYTES; s += MEMCPY_FAST_COPY_UNIT_BYTES,
66             d += MEMCPY_FAST_COPY_UNIT_BYTES, n -= MEMCPY_FAST_COPY_UNIT_BYTES) {
67             *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_0) = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_0);
68             *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_1) = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_1);
69             *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_2) = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_2);
70             *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_3) = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_3);
71         }
72         if (n & MEMCPY_FAST_COPY_OFFSET_UNIT_2) {
73             *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_0) = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_0);
74             *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_1) = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_1);
75             d += MEMCPY_FAST_COPY_OFFSET_UNIT_2;
76             s += MEMCPY_FAST_COPY_OFFSET_UNIT_2;
77         }
78         if (n & MEMCPY_FAST_COPY_OFFSET_UNIT_1) {
79             *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_0) = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_0);
80             d += MEMCPY_FAST_COPY_OFFSET_UNIT_1;
81             s += MEMCPY_FAST_COPY_OFFSET_UNIT_1;
82         }
83         if (n & MEMCPY_BYTE_CHECK_NUM_2) {
84             *d++ = *s++; *d++ = *s++;
85         }
86         if (n & MEMCPY_BYTE_CHECK_NUM_1) {
87             *d = *s;
88         }
89         return dest;
90     }
91 
92     if (n >= MEMCPY_NOT_ALIGN_FAST_COPY_THRESHOLD) {
93         switch ((uintptr_t)d % MEMCPY_ALIGH_UNIT_BYTES_4) {
94             case MEMCPY_OFFSET_BYTES_1:
95                 w = *(u32 *)s;
96                 *d++ = *s++;
97                 *d++ = *s++;
98                 *d++ = *s++;
99                 n -= MEMCPY_ALIGH_OFFSET_BYTES_1;
100                 for (; n >= MEMCPY_FAST_COPY_UNIT_BYTES + MEMCPY_OFFSET_BYTES_1; s += MEMCPY_FAST_COPY_UNIT_BYTES,
101                     d += MEMCPY_FAST_COPY_UNIT_BYTES, n -= MEMCPY_FAST_COPY_UNIT_BYTES) {
102                     x = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_0 + MEMCPY_OFFSET_BYTES_1);
103                     *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_0) =
104                         (w LS MEMCPY_OFFSET_ALIGN_BITS_1) | (x RS MEMCPY_OFFSET_BITS_1);
105                     w = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_1 + MEMCPY_OFFSET_BYTES_1);
106                     *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_1) =
107                         (x LS MEMCPY_OFFSET_ALIGN_BITS_1) | (w RS MEMCPY_OFFSET_BITS_1);
108                     x = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_2 + MEMCPY_OFFSET_BYTES_1);
109                     *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_2) =
110                         (w LS MEMCPY_OFFSET_ALIGN_BITS_1) | (x RS MEMCPY_OFFSET_BITS_1);
111                     w = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_3 + MEMCPY_OFFSET_BYTES_1);
112                     *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_3) =
113                         (x LS MEMCPY_OFFSET_ALIGN_BITS_1) | (w RS MEMCPY_OFFSET_BITS_1);
114                 }
115                 break;
116             case MEMCPY_OFFSET_BYTES_2:
117                 w = *(u32 *)s;
118                 *d++ = *s++;
119                 *d++ = *s++;
120                 n -= MEMCPY_ALIGH_OFFSET_BYTES_2;
121                 for (; n >= MEMCPY_FAST_COPY_UNIT_BYTES + MEMCPY_OFFSET_BYTES_2; s += MEMCPY_FAST_COPY_UNIT_BYTES,
122                     d += MEMCPY_FAST_COPY_UNIT_BYTES, n -= MEMCPY_FAST_COPY_UNIT_BYTES) {
123                     x = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_0 + MEMCPY_OFFSET_BYTES_2);
124                     *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_0) =
125                         (w LS MEMCPY_OFFSET_ALIGN_BITS_2) | (x RS MEMCPY_OFFSET_BITS_2);
126                     w = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_1 + MEMCPY_OFFSET_BYTES_2);
127                     *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_1) =
128                         (x LS MEMCPY_OFFSET_ALIGN_BITS_2) | (w RS MEMCPY_OFFSET_BITS_2);
129                     x = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_2 + MEMCPY_OFFSET_BYTES_2);
130                     *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_2) =
131                         (w LS MEMCPY_OFFSET_ALIGN_BITS_2) | (x RS MEMCPY_OFFSET_BITS_2);
132                     w = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_3 + MEMCPY_OFFSET_BYTES_2);
133                     *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_3) =
134                         (x LS MEMCPY_OFFSET_ALIGN_BITS_2) | (w RS MEMCPY_OFFSET_BITS_2);
135                 }
136                 break;
137             case MEMCPY_OFFSET_BYTES_3:
138                 w = *(u32 *)s;
139                 *d++ = *s++;
140                 n -= MEMCPY_ALIGH_OFFSET_BYTES_3;
141                 for (; n >= MEMCPY_FAST_COPY_UNIT_BYTES + MEMCPY_OFFSET_BYTES_3; s += MEMCPY_FAST_COPY_UNIT_BYTES,
142                     d += MEMCPY_FAST_COPY_UNIT_BYTES, n -= MEMCPY_FAST_COPY_UNIT_BYTES) {
143                     x = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_0 + MEMCPY_OFFSET_BYTES_3);
144                     *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_0) =
145                         (w LS MEMCPY_OFFSET_ALIGN_BITS_3) | (x RS MEMCPY_OFFSET_BITS_3);
146                     w = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_1 + MEMCPY_OFFSET_BYTES_3);
147                     *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_1) =
148                         (x LS MEMCPY_OFFSET_ALIGN_BITS_3) | (w RS MEMCPY_OFFSET_BITS_3);
149                     x = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_2 + MEMCPY_OFFSET_BYTES_3);
150                     *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_2) =
151                         (w LS MEMCPY_OFFSET_ALIGN_BITS_3) | (x RS MEMCPY_OFFSET_BITS_3);
152                     w = *(u32 *)(s + MEMCPY_FAST_COPY_OFFSET_UNIT_3 + MEMCPY_OFFSET_BYTES_3);
153                     *(u32 *)(d + MEMCPY_FAST_COPY_OFFSET_UNIT_3) =
154                         (x LS MEMCPY_OFFSET_ALIGN_BITS_3) | (w RS MEMCPY_OFFSET_BITS_3);
155                 }
156                 break;
157         }
158     }
159     if (n & MEMCPY_BYTE_CHECK_NUM_16) {
160         *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
161         *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
162         *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
163         *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
164     }
165     if (n & MEMCPY_BYTE_CHECK_NUM_8) {
166         *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
167         *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
168     }
169     if (n & MEMCPY_BYTE_CHECK_NUM_4) {
170         *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
171     }
172     if (n & MEMCPY_BYTE_CHECK_NUM_2) {
173         *d++ = *s++; *d++ = *s++;
174     }
175     if (n & MEMCPY_BYTE_CHECK_NUM_1) {
176         *d = *s;
177     }
178     return dest;
179 #endif
180 
181     for (; n; n--) *d++ = *s++;
182     return dest;
183 }
184