1 /* insert_string.h
2 *
3 * Copyright 2019 The Chromium Authors. All rights reserved.
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the Chromium source repository LICENSE file.
6 */
7 #ifdef _MSC_VER
8 #define INLINE __inline
9 #else
10 #define INLINE inline
11 #endif
12
13 /* Optimized insert_string block */
14 #if defined(CRC32_SIMD_SSE42_PCLMUL) || defined(CRC32_ARMV8_CRC32)
15 #define TARGET_CPU_WITH_CRC
16 // clang-format off
17 #if defined(CRC32_SIMD_SSE42_PCLMUL)
18 /* Required to make MSVC bot build pass. */
19 #include <smmintrin.h>
20 #if defined(__GNUC__) || defined(__clang__)
21 #undef TARGET_CPU_WITH_CRC
22 #define TARGET_CPU_WITH_CRC __attribute__((target("sse4.2")))
23 #endif
24
25 #define _cpu_crc32_u32 _mm_crc32_u32
26
27 #elif defined(CRC32_ARMV8_CRC32)
28 #include "arm_features.h"
29 #if defined(__clang__)
30 #undef TARGET_CPU_WITH_CRC
31 #define __crc32cw __builtin_arm_crc32cw
32 #endif
33
34 #define _cpu_crc32_u32 __crc32cw
35
36 #if defined(__aarch64__)
37 #define TARGET_CPU_WITH_CRC __attribute__((target("crc")))
38 #else // !defined(__aarch64__)
39 #define TARGET_CPU_WITH_CRC __attribute__((target("armv8-a,crc")))
40 #endif // defined(__aarch64__)
41 #endif
42 // clang-format on
43 TARGET_CPU_WITH_CRC
insert_string_optimized(deflate_state * const s,const Pos str)44 local INLINE Pos insert_string_optimized(deflate_state* const s,
45 const Pos str) {
46 Pos ret;
47 unsigned *ip, val, h = 0;
48
49 ip = (unsigned*)&s->window[str];
50 val = *ip;
51
52 if (s->level >= 6)
53 val &= 0xFFFFFF;
54
55 /* Unlike the case of data integrity checks for GZIP format where the
56 * polynomial used is defined (https://tools.ietf.org/html/rfc1952#page-11),
57 * here it is just a hash function for the hash table used while
58 * performing compression.
59 */
60 h = _cpu_crc32_u32(h, val);
61
62 ret = s->head[h & s->hash_mask];
63 s->head[h & s->hash_mask] = str;
64 s->prev[str & s->w_mask] = ret;
65 return ret;
66 }
67 #endif /* Optimized insert_string block */
68
69 /* ===========================================================================
70 * Update a hash value with the given input byte
71 * IN assertion: all calls to UPDATE_HASH are made with consecutive input
72 * characters, so that a running hash key can be computed from the previous
73 * key instead of complete recalculation each time.
74 */
75 #define UPDATE_HASH(s, h, c) (h = (((h) << s->hash_shift) ^ (c)) & s->hash_mask)
76
77 /* ===========================================================================
78 * Insert string str in the dictionary and set match_head to the previous head
79 * of the hash chain (the most recent string with same hash key). Return
80 * the previous length of the hash chain.
81 * If this file is compiled with -DFASTEST, the compression level is forced
82 * to 1, and no hash chains are maintained.
83 * IN assertion: all calls to INSERT_STRING are made with consecutive input
84 * characters and the first MIN_MATCH bytes of str are valid (except for
85 * the last MIN_MATCH-1 bytes of the input file).
86 */
insert_string_c(deflate_state * const s,const Pos str)87 local INLINE Pos insert_string_c(deflate_state* const s, const Pos str) {
88 Pos ret;
89
90 UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH - 1)]);
91 #ifdef FASTEST
92 ret = s->head[s->ins_h];
93 #else
94 ret = s->prev[str & s->w_mask] = s->head[s->ins_h];
95 #endif
96 s->head[s->ins_h] = str;
97
98 return ret;
99 }
100
insert_string(deflate_state * const s,const Pos str)101 local INLINE Pos insert_string(deflate_state* const s, const Pos str) {
102 /* String dictionary insertion: faster symbol hashing has a positive impact
103 * on data compression speeds (around 20% on Intel and 36% on Arm Cortex big
104 * cores).
105 * A misfeature is that the generated compressed output will differ from
106 * vanilla zlib (even though it is still valid 'DEFLATE-d' content).
107 *
108 * We offer here a way to disable the optimization if there is the expectation
109 * that compressed content should match when compared to vanilla zlib.
110 */
111 #if !defined(CHROMIUM_ZLIB_NO_CASTAGNOLI)
112 /* TODO(cavalcantii): unify CPU features code. */
113 #if defined(CRC32_ARMV8_CRC32)
114 if (arm_cpu_enable_crc32)
115 return insert_string_optimized(s, str);
116 #elif defined(CRC32_SIMD_SSE42_PCLMUL)
117 if (x86_cpu_enable_simd)
118 return insert_string_optimized(s, str);
119 #endif
120 #endif
121 return insert_string_c(s, str);
122 }
123