• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1module xxhash where
2
3/**
4 * The 32-bit variant of xxHash. The first argument is the sequence
5 * of L bytes to hash. The second argument is a seed value.
6 */
7XXH32 : {L} (fin L) => [L][8] -> [32] -> [32]
8XXH32 input seed = XXH32_avalanche acc1
9  where (stripes16 # stripes4 # stripes1) = input
10        accR = foldl XXH32_rounds (XXH32_init seed) (split stripes16 : [L/16][16][8])
11        accL = `(L % 2^^32) + if (`L:Integer) < 16
12                              then seed + PRIME32_5
13                              else XXH32_converge accR
14        acc4 = foldl XXH32_digest4 accL (split stripes4 : [(L%16)/4][4][8])
15        acc1 = foldl XXH32_digest1 acc4 (stripes1 : [L%4][8])
16
17/**
18 * The 64-bit variant of xxHash. The first argument is the sequence
19 * of L bytes to hash. The second argument is a seed value.
20 */
21XXH64 : {L} (fin L) => [L][8] -> [64] -> [64]
22XXH64 input seed = XXH64_avalanche acc1
23  where (stripes32 # stripes8 # stripes4 # stripes1) = input
24        accR = foldl XXH64_rounds (XXH64_init seed) (split stripes32 : [L/32][32][8])
25        accL = `(L % 2^^64) + if (`L:Integer) < 32
26                              then seed + PRIME64_5
27                              else XXH64_converge accR
28        acc8 = foldl XXH64_digest8 accL (split stripes8 : [(L%32)/8][8][8])
29        acc4 = foldl XXH64_digest4 acc8 (split stripes4 : [(L%8)/4][4][8])
30        acc1 = foldl XXH64_digest1 acc4 (stripes1 : [L%4][8])
31
32private
33
34  //Utility functions
35
36  /**
37   * Combines a sequence of bytes into a word using the little-endian
38   * convention.
39   */
40  toLE bytes = join (reverse bytes)
41
42  //32-bit xxHash helper functions
43
44  //32-bit prime number constants
45  PRIME32_1 = 0x9E3779B1 : [32]
46  PRIME32_2 = 0x85EBCA77 : [32]
47  PRIME32_3 = 0xC2B2AE3D : [32]
48  PRIME32_4 = 0x27D4EB2F : [32]
49  PRIME32_5 = 0x165667B1 : [32]
50
51  /**
52   * The property shows that the hexadecimal representation of the
53   * PRIME32 constants is the same as the binary representation.
54   */
55  property PRIME32s_as_bits_correct =
56    (PRIME32_1 == 0b10011110001101110111100110110001) /\
57    (PRIME32_2 == 0b10000101111010111100101001110111) /\
58    (PRIME32_3 == 0b11000010101100101010111000111101) /\
59    (PRIME32_4 == 0b00100111110101001110101100101111) /\
60    (PRIME32_5 == 0b00010110010101100110011110110001)
61
62  /**
63   * This function initializes the four internal accumulators of XXH32.
64   */
65  XXH32_init : [32] -> [4][32]
66  XXH32_init seed = [acc1, acc2, acc3, acc4]
67    where acc1 = seed + PRIME32_1 + PRIME32_2
68          acc2 = seed + PRIME32_2
69          acc3 = seed + 0
70          acc4 = seed - PRIME32_1
71
72  /**
73   * This processes a single lane of the main round function of XXH32.
74   */
75  XXH32_round : [32] -> [32] -> [32]
76  XXH32_round accN laneN = ((accN + laneN * PRIME32_2) <<< 13) * PRIME32_1
77
78  /**
79   * This is the main round function of XXH32 and processes a stripe,
80   * i.e. 4 lanes with 4 bytes each.
81   */
82  XXH32_rounds : [4][32] -> [16][8] -> [4][32]
83  XXH32_rounds accs stripe =
84    [ XXH32_round accN (toLE laneN) | accN <- accs | laneN <- split stripe ]
85
86  /**
87   * This function combines the four lane accumulators into a single
88   * 32-bit value.
89   */
90  XXH32_converge : [4][32] -> [32]
91  XXH32_converge [acc1, acc2, acc3, acc4] =
92    (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18)
93
94  /**
95   * This function digests a four byte lane
96   */
97  XXH32_digest4 : [32] -> [4][8] -> [32]
98  XXH32_digest4 acc lane = ((acc + toLE lane * PRIME32_3) <<< 17) * PRIME32_4
99
100  /**
101   * This function digests a single byte lane
102   */
103  XXH32_digest1 : [32] -> [8] -> [32]
104  XXH32_digest1 acc lane = ((acc + (0 # lane) * PRIME32_5) <<< 11) * PRIME32_1
105
106  /**
107   * This function ensures that all input bits have a chance to impact
108   * any bit in the output digest, resulting in an unbiased
109   * distribution.
110   */
111  XXH32_avalanche : [32] -> [32]
112  XXH32_avalanche acc0 = acc5
113    where acc1 = acc0 ^ (acc0 >> 15)
114          acc2 = acc1 * PRIME32_2
115          acc3 = acc2 ^ (acc2 >> 13)
116          acc4 = acc3 * PRIME32_3
117          acc5 = acc4 ^ (acc4 >> 16)
118
119  //64-bit xxHash helper functions
120
121  //64-bit prime number constants
122  PRIME64_1 = 0x9E3779B185EBCA87 : [64]
123  PRIME64_2 = 0xC2B2AE3D27D4EB4F : [64]
124  PRIME64_3 = 0x165667B19E3779F9 : [64]
125  PRIME64_4 = 0x85EBCA77C2B2AE63 : [64]
126  PRIME64_5 = 0x27D4EB2F165667C5 : [64]
127
128  /**
129   * The property shows that the hexadecimal representation of the
130   * PRIME64 constants is the same as the binary representation.
131   */
132  property PRIME64s_as_bits_correct =
133    (PRIME64_1 == 0b1001111000110111011110011011000110000101111010111100101010000111) /\
134    (PRIME64_2 == 0b1100001010110010101011100011110100100111110101001110101101001111) /\
135    (PRIME64_3 == 0b0001011001010110011001111011000110011110001101110111100111111001) /\
136    (PRIME64_4 == 0b1000010111101011110010100111011111000010101100101010111001100011) /\
137    (PRIME64_5 == 0b0010011111010100111010110010111100010110010101100110011111000101)
138
139  /**
140   * This function initializes the four internal accumulators of XXH64.
141   */
142  XXH64_init : [64] -> [4][64]
143  XXH64_init seed = [acc1, acc2, acc3, acc4]
144    where acc1 = seed + PRIME64_1 + PRIME64_2
145          acc2 = seed + PRIME64_2
146          acc3 = seed + 0
147          acc4 = seed - PRIME64_1
148
149  /**
150   * This processes a single lane of the main round function of XXH64.
151   */
152  XXH64_round : [64] -> [64] -> [64]
153  XXH64_round accN laneN = ((accN + laneN * PRIME64_2) <<< 31) * PRIME64_1
154
155  /**
156   * This is the main round function of XXH64 and processes a stripe,
157   * i.e. 4 lanes with 8 bytes each.
158   */
159  XXH64_rounds : [4][64] -> [32][8] -> [4][64]
160  XXH64_rounds accs stripe =
161    [ XXH64_round accN (toLE laneN) | accN <- accs | laneN <- split stripe ]
162
163  /**
164   * This is a helper function, used to merge the four lane accumulators.
165   */
166  mergeAccumulator : [64] -> [64] -> [64]
167  mergeAccumulator acc accN = (acc ^ XXH64_round 0 accN) * PRIME64_1 + PRIME64_4
168
169  /**
170   * This function combines the four lane accumulators into a single
171   * 64-bit value.
172   */
173  XXH64_converge : [4][64] -> [64]
174  XXH64_converge [acc1, acc2, acc3, acc4] =
175    foldl mergeAccumulator ((acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18)) [acc1, acc2, acc3, acc4]
176
177  /**
178   * This function digests an eight byte lane
179   */
180  XXH64_digest8 : [64] -> [8][8] -> [64]
181  XXH64_digest8 acc lane = ((acc ^ XXH64_round 0 (toLE lane)) <<< 27) * PRIME64_1 + PRIME64_4
182
183  /**
184   * This function digests a four byte lane
185   */
186  XXH64_digest4 : [64] -> [4][8] -> [64]
187  XXH64_digest4 acc lane = ((acc ^ (0 # toLE lane) * PRIME64_1) <<< 23) * PRIME64_2 + PRIME64_3
188
189  /**
190   * This function digests a single byte lane
191   */
192  XXH64_digest1 : [64] -> [8] -> [64]
193  XXH64_digest1 acc lane = ((acc ^ (0 # lane) * PRIME64_5) <<< 11) * PRIME64_1
194
195  /**
196   * This function ensures that all input bits have a chance to impact
197   * any bit in the output digest, resulting in an unbiased
198   * distribution.
199   */
200  XXH64_avalanche : [64] -> [64]
201  XXH64_avalanche acc0 = acc5
202    where acc1 = acc0 ^ (acc0 >> 33)
203          acc2 = acc1 * PRIME64_2
204          acc3 = acc2 ^ (acc2 >> 29)
205          acc4 = acc3 * PRIME64_3
206          acc5 = acc4 ^ (acc4 >> 32)
207