• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef SkChecksum_opts_DEFINED
9 #define SkChecksum_opts_DEFINED
10 
11 #include "include/core/SkTypes.h"
12 #include "include/private/SkChecksum.h"
13 #include "src/core/SkUtils.h"   // sk_unaligned_load
14 
15 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
16     #include <immintrin.h>
17 #elif defined(SK_ARM_HAS_CRC32)
18     #include <arm_acle.h>
19 #endif
20 
21 namespace SK_OPTS_NS {
22 
23 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || defined(_M_X64))
24     // This is not a CRC32.  It's Just A Hash that uses those instructions because they're fast.
hash_fn(const void * vdata,size_t bytes,uint32_t seed)25     /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) {
26         auto data = (const uint8_t*)vdata;
27 
28         // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while.
29         uint64_t hash = seed;
30         if (bytes >= 24) {
31             // We'll create 3 independent hashes, each using _mm_crc32_u64()
32             // to hash 8 bytes per step.  Both 3 and independent are important:
33             // we can execute 3 of these instructions in parallel on a single core.
34             uint64_t a = hash,
35                      b = hash,
36                      c = hash;
37             size_t steps = bytes/24;
38             while (steps --> 0) {
39                 a = _mm_crc32_u64(a, sk_unaligned_load<uint64_t>(data+ 0));
40                 b = _mm_crc32_u64(b, sk_unaligned_load<uint64_t>(data+ 8));
41                 c = _mm_crc32_u64(c, sk_unaligned_load<uint64_t>(data+16));
42                 data += 24;
43             }
44             bytes %= 24;
45             hash = _mm_crc32_u32(a, _mm_crc32_u32(b, c));
46         }
47 
48         SkASSERT(bytes < 24);
49         if (bytes >= 16) {
50             hash = _mm_crc32_u64(hash, sk_unaligned_load<uint64_t>(data));
51             bytes -= 8;
52             data  += 8;
53         }
54 
55         SkASSERT(bytes < 16);
56         if (bytes & 8) {
57             hash = _mm_crc32_u64(hash, sk_unaligned_load<uint64_t>(data));
58             data  += 8;
59         }
60 
61         // The remainder of these _mm_crc32_u*() operate on a 32-bit register.
62         // We don't lose anything here: only the bottom 32-bits were populated.
63         auto hash32 = (uint32_t)hash;
64 
65         if (bytes & 4) {
66             hash32 = _mm_crc32_u32(hash32, sk_unaligned_load<uint32_t>(data));
67             data += 4;
68         }
69         if (bytes & 2) {
70             hash32 = _mm_crc32_u16(hash32, sk_unaligned_load<uint16_t>(data));
71             data += 2;
72         }
73         if (bytes & 1) {
74             hash32 = _mm_crc32_u8(hash32, sk_unaligned_load<uint8_t>(data));
75         }
76         return hash32;
77     }
78 
79 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
80     // 32-bit version of above, using _mm_crc32_u32() but not _mm_crc32_u64().
81     /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
82         auto data = (const uint8_t*)vdata;
83 
84         if (bytes >= 12) {
85             // We'll create 3 independent hashes, each using _mm_crc32_u32()
86             // to hash 4 bytes per step.  Both 3 and independent are important:
87             // we can execute 3 of these instructions in parallel on a single core.
88             uint32_t a = hash,
89                      b = hash,
90                      c = hash;
91             size_t steps = bytes/12;
92             while (steps --> 0) {
93                 a = _mm_crc32_u32(a, sk_unaligned_load<uint32_t>(data+0));
94                 b = _mm_crc32_u32(b, sk_unaligned_load<uint32_t>(data+4));
95                 c = _mm_crc32_u32(c, sk_unaligned_load<uint32_t>(data+8));
96                 data += 12;
97             }
98             bytes %= 12;
99             hash = _mm_crc32_u32(a, _mm_crc32_u32(b, c));
100         }
101 
102         SkASSERT(bytes < 12);
103         if (bytes >= 8) {
104             hash = _mm_crc32_u32(hash, sk_unaligned_load<uint32_t>(data));
105             bytes -= 4;
106             data  += 4;
107         }
108 
109         SkASSERT(bytes < 8);
110         if (bytes & 4) {
111             hash = _mm_crc32_u32(hash, sk_unaligned_load<uint32_t>(data));
112             data += 4;
113         }
114         if (bytes & 2) {
115             hash = _mm_crc32_u16(hash, sk_unaligned_load<uint16_t>(data));
116             data += 2;
117         }
118         if (bytes & 1) {
119             hash = _mm_crc32_u8(hash, sk_unaligned_load<uint8_t>(data));
120         }
121         return hash;
122     }
123 
124 #elif defined(SK_ARM_HAS_CRC32)
125     /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
126         auto data = (const uint8_t*)vdata;
127         if (bytes >= 24) {
128             uint32_t a = hash,
129                      b = hash,
130                      c = hash;
131             size_t steps = bytes/24;
132             while (steps --> 0) {
133                 a = __crc32d(a, sk_unaligned_load<uint64_t>(data+ 0));
134                 b = __crc32d(b, sk_unaligned_load<uint64_t>(data+ 8));
135                 c = __crc32d(c, sk_unaligned_load<uint64_t>(data+16));
136                 data += 24;
137             }
138             bytes %= 24;
139             hash = __crc32w(a, __crc32w(b, c));
140         }
141 
142         SkASSERT(bytes < 24);
143         if (bytes >= 16) {
144             hash = __crc32d(hash, sk_unaligned_load<uint64_t>(data));
145             bytes -= 8;
146             data  += 8;
147         }
148 
149         SkASSERT(bytes < 16);
150         if (bytes & 8) {
151             hash = __crc32d(hash, sk_unaligned_load<uint64_t>(data));
152             data += 8;
153         }
154         if (bytes & 4) {
155             hash = __crc32w(hash, sk_unaligned_load<uint32_t>(data));
156             data += 4;
157         }
158         if (bytes & 2) {
159             hash = __crc32h(hash, sk_unaligned_load<uint16_t>(data));
160             data += 2;
161         }
162         if (bytes & 1) {
163             hash = __crc32b(hash, sk_unaligned_load<uint8_t>(data));
164         }
165         return hash;
166     }
167 
168 #else
169     // This is Murmur3.
170     /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
171         auto data = (const uint8_t*)vdata;
172 
173         size_t original_bytes = bytes;
174 
175         // Handle 4 bytes at a time while possible.
176         while (bytes >= 4) {
177             uint32_t k = sk_unaligned_load<uint32_t>(data);
178             k *= 0xcc9e2d51;
179             k = (k << 15) | (k >> 17);
180             k *= 0x1b873593;
181 
182             hash ^= k;
183             hash = (hash << 13) | (hash >> 19);
184             hash *= 5;
185             hash += 0xe6546b64;
186 
187             bytes -= 4;
188             data  += 4;
189         }
190 
191         // Handle last 0-3 bytes.
192         uint32_t k = 0;
193         switch (bytes & 3) {
194             case 3: k ^= data[2] << 16;
195             case 2: k ^= data[1] <<  8;
196             case 1: k ^= data[0] <<  0;
197                     k *= 0xcc9e2d51;
198                     k = (k << 15) | (k >> 17);
199                     k *= 0x1b873593;
200                     hash ^= k;
201         }
202 
203         hash ^= original_bytes;
204         return SkChecksum::Mix(hash);
205     }
206 #endif
207 
208 }  // namespace SK_OPTS_NS
209 
210 #endif//SkChecksum_opts_DEFINED
211