1 #include "crc32c.h"
2
3 #define CRC32C3X8(ITR) \
4 crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\
5 crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\
6 crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR)));
7
8 #define CRC32C7X3X8(ITR) do {\
9 CRC32C3X8((ITR)*7+0) \
10 CRC32C3X8((ITR)*7+1) \
11 CRC32C3X8((ITR)*7+2) \
12 CRC32C3X8((ITR)*7+3) \
13 CRC32C3X8((ITR)*7+4) \
14 CRC32C3X8((ITR)*7+5) \
15 CRC32C3X8((ITR)*7+6) \
16 } while(0)
17
18 #ifndef HWCAP_CRC32
19 #define HWCAP_CRC32 (1 << 7)
20 #endif /* HWCAP_CRC32 */
21
22 int crc32c_arm64_available = 0;
23
24 #ifdef ARCH_HAVE_ARM64_CRC_CRYPTO
25
26 #include <sys/auxv.h>
27 #include <arm_acle.h>
28 #include <arm_neon.h>
29
30 static int crc32c_probed;
31
32 /*
33 * Function to calculate reflected crc with PMULL Instruction
34 * crc done "by 3" for fixed input block size of 1024 bytes
35 */
crc32c_arm64(unsigned char const * data,unsigned long length)36 uint32_t crc32c_arm64(unsigned char const *data, unsigned long length)
37 {
38 signed long len = length;
39 uint32_t crc = ~0;
40 uint32_t crc0, crc1, crc2;
41
42 /* Load two consts: K1 and K2 */
43 const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014;
44 uint64_t t0, t1;
45
46 while ((len -= 1024) >= 0) {
47 /* Do first 8 bytes here for better pipelining */
48 crc0 = __crc32cd(crc, *(const uint64_t *)data);
49 crc1 = 0;
50 crc2 = 0;
51 data += sizeof(uint64_t);
52
53 /* Process block inline
54 Process crc0 last to avoid dependency with above */
55 CRC32C7X3X8(0);
56 CRC32C7X3X8(1);
57 CRC32C7X3X8(2);
58 CRC32C7X3X8(3);
59 CRC32C7X3X8(4);
60 CRC32C7X3X8(5);
61
62 data += 42*3*sizeof(uint64_t);
63
64 /* Merge crc0 and crc1 into crc2
65 crc1 multiply by K2
66 crc0 multiply by K1 */
67
68 t1 = (uint64_t)vmull_p64(crc1, k2);
69 t0 = (uint64_t)vmull_p64(crc0, k1);
70 crc = __crc32cd(crc2, *(const uint64_t *)data);
71 crc1 = __crc32cd(0, t1);
72 crc ^= crc1;
73 crc0 = __crc32cd(0, t0);
74 crc ^= crc0;
75
76 data += sizeof(uint64_t);
77 }
78
79 if (!(len += 1024))
80 return crc;
81
82 while ((len -= sizeof(uint64_t)) >= 0) {
83 crc = __crc32cd(crc, *(const uint64_t *)data);
84 data += sizeof(uint64_t);
85 }
86
87 /* The following is more efficient than the straight loop */
88 if (len & sizeof(uint32_t)) {
89 crc = __crc32cw(crc, *(const uint32_t *)data);
90 data += sizeof(uint32_t);
91 }
92 if (len & sizeof(uint16_t)) {
93 crc = __crc32ch(crc, *(const uint16_t *)data);
94 data += sizeof(uint16_t);
95 }
96 if (len & sizeof(uint8_t)) {
97 crc = __crc32cb(crc, *(const uint8_t *)data);
98 }
99
100 return crc;
101 }
102
crc32c_arm64_probe(void)103 void crc32c_arm64_probe(void)
104 {
105 unsigned long hwcap;
106
107 if (!crc32c_probed) {
108 hwcap = getauxval(AT_HWCAP);
109 if (hwcap & HWCAP_CRC32)
110 crc32c_arm64_available = 1;
111 crc32c_probed = 1;
112 }
113 }
114
115 #endif /* ARCH_HAVE_ARM64_CRC_CRYPTO */
116