1module xxhash where 2 3/** 4 * The 32-bit variant of xxHash. The first argument is the sequence 5 * of L bytes to hash. The second argument is a seed value. 6 */ 7XXH32 : {L} (fin L) => [L][8] -> [32] -> [32] 8XXH32 input seed = XXH32_avalanche acc1 9 where (stripes16 # stripes4 # stripes1) = input 10 accR = foldl XXH32_rounds (XXH32_init seed) (split stripes16 : [L/16][16][8]) 11 accL = `(L % 2^^32) + if (`L:Integer) < 16 12 then seed + PRIME32_5 13 else XXH32_converge accR 14 acc4 = foldl XXH32_digest4 accL (split stripes4 : [(L%16)/4][4][8]) 15 acc1 = foldl XXH32_digest1 acc4 (stripes1 : [L%4][8]) 16 17/** 18 * The 64-bit variant of xxHash. The first argument is the sequence 19 * of L bytes to hash. The second argument is a seed value. 20 */ 21XXH64 : {L} (fin L) => [L][8] -> [64] -> [64] 22XXH64 input seed = XXH64_avalanche acc1 23 where (stripes32 # stripes8 # stripes4 # stripes1) = input 24 accR = foldl XXH64_rounds (XXH64_init seed) (split stripes32 : [L/32][32][8]) 25 accL = `(L % 2^^64) + if (`L:Integer) < 32 26 then seed + PRIME64_5 27 else XXH64_converge accR 28 acc8 = foldl XXH64_digest8 accL (split stripes8 : [(L%32)/8][8][8]) 29 acc4 = foldl XXH64_digest4 acc8 (split stripes4 : [(L%8)/4][4][8]) 30 acc1 = foldl XXH64_digest1 acc4 (stripes1 : [L%4][8]) 31 32private 33 34 //Utility functions 35 36 /** 37 * Combines a sequence of bytes into a word using the little-endian 38 * convention. 39 */ 40 toLE bytes = join (reverse bytes) 41 42 //32-bit xxHash helper functions 43 44 //32-bit prime number constants 45 PRIME32_1 = 0x9E3779B1 : [32] 46 PRIME32_2 = 0x85EBCA77 : [32] 47 PRIME32_3 = 0xC2B2AE3D : [32] 48 PRIME32_4 = 0x27D4EB2F : [32] 49 PRIME32_5 = 0x165667B1 : [32] 50 51 /** 52 * The property shows that the hexadecimal representation of the 53 * PRIME32 constants is the same as the binary representation. 54 */ 55 property PRIME32s_as_bits_correct = 56 (PRIME32_1 == 0b10011110001101110111100110110001) /\ 57 (PRIME32_2 == 0b10000101111010111100101001110111) /\ 58 (PRIME32_3 == 0b11000010101100101010111000111101) /\ 59 (PRIME32_4 == 0b00100111110101001110101100101111) /\ 60 (PRIME32_5 == 0b00010110010101100110011110110001) 61 62 /** 63 * This function initializes the four internal accumulators of XXH32. 64 */ 65 XXH32_init : [32] -> [4][32] 66 XXH32_init seed = [acc1, acc2, acc3, acc4] 67 where acc1 = seed + PRIME32_1 + PRIME32_2 68 acc2 = seed + PRIME32_2 69 acc3 = seed + 0 70 acc4 = seed - PRIME32_1 71 72 /** 73 * This processes a single lane of the main round function of XXH32. 74 */ 75 XXH32_round : [32] -> [32] -> [32] 76 XXH32_round accN laneN = ((accN + laneN * PRIME32_2) <<< 13) * PRIME32_1 77 78 /** 79 * This is the main round function of XXH32 and processes a stripe, 80 * i.e. 4 lanes with 4 bytes each. 81 */ 82 XXH32_rounds : [4][32] -> [16][8] -> [4][32] 83 XXH32_rounds accs stripe = 84 [ XXH32_round accN (toLE laneN) | accN <- accs | laneN <- split stripe ] 85 86 /** 87 * This function combines the four lane accumulators into a single 88 * 32-bit value. 89 */ 90 XXH32_converge : [4][32] -> [32] 91 XXH32_converge [acc1, acc2, acc3, acc4] = 92 (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18) 93 94 /** 95 * This function digests a four byte lane 96 */ 97 XXH32_digest4 : [32] -> [4][8] -> [32] 98 XXH32_digest4 acc lane = ((acc + toLE lane * PRIME32_3) <<< 17) * PRIME32_4 99 100 /** 101 * This function digests a single byte lane 102 */ 103 XXH32_digest1 : [32] -> [8] -> [32] 104 XXH32_digest1 acc lane = ((acc + (0 # lane) * PRIME32_5) <<< 11) * PRIME32_1 105 106 /** 107 * This function ensures that all input bits have a chance to impact 108 * any bit in the output digest, resulting in an unbiased 109 * distribution. 110 */ 111 XXH32_avalanche : [32] -> [32] 112 XXH32_avalanche acc0 = acc5 113 where acc1 = acc0 ^ (acc0 >> 15) 114 acc2 = acc1 * PRIME32_2 115 acc3 = acc2 ^ (acc2 >> 13) 116 acc4 = acc3 * PRIME32_3 117 acc5 = acc4 ^ (acc4 >> 16) 118 119 //64-bit xxHash helper functions 120 121 //64-bit prime number constants 122 PRIME64_1 = 0x9E3779B185EBCA87 : [64] 123 PRIME64_2 = 0xC2B2AE3D27D4EB4F : [64] 124 PRIME64_3 = 0x165667B19E3779F9 : [64] 125 PRIME64_4 = 0x85EBCA77C2B2AE63 : [64] 126 PRIME64_5 = 0x27D4EB2F165667C5 : [64] 127 128 /** 129 * The property shows that the hexadecimal representation of the 130 * PRIME64 constants is the same as the binary representation. 131 */ 132 property PRIME64s_as_bits_correct = 133 (PRIME64_1 == 0b1001111000110111011110011011000110000101111010111100101010000111) /\ 134 (PRIME64_2 == 0b1100001010110010101011100011110100100111110101001110101101001111) /\ 135 (PRIME64_3 == 0b0001011001010110011001111011000110011110001101110111100111111001) /\ 136 (PRIME64_4 == 0b1000010111101011110010100111011111000010101100101010111001100011) /\ 137 (PRIME64_5 == 0b0010011111010100111010110010111100010110010101100110011111000101) 138 139 /** 140 * This function initializes the four internal accumulators of XXH64. 141 */ 142 XXH64_init : [64] -> [4][64] 143 XXH64_init seed = [acc1, acc2, acc3, acc4] 144 where acc1 = seed + PRIME64_1 + PRIME64_2 145 acc2 = seed + PRIME64_2 146 acc3 = seed + 0 147 acc4 = seed - PRIME64_1 148 149 /** 150 * This processes a single lane of the main round function of XXH64. 151 */ 152 XXH64_round : [64] -> [64] -> [64] 153 XXH64_round accN laneN = ((accN + laneN * PRIME64_2) <<< 31) * PRIME64_1 154 155 /** 156 * This is the main round function of XXH64 and processes a stripe, 157 * i.e. 4 lanes with 8 bytes each. 158 */ 159 XXH64_rounds : [4][64] -> [32][8] -> [4][64] 160 XXH64_rounds accs stripe = 161 [ XXH64_round accN (toLE laneN) | accN <- accs | laneN <- split stripe ] 162 163 /** 164 * This is a helper function, used to merge the four lane accumulators. 165 */ 166 mergeAccumulator : [64] -> [64] -> [64] 167 mergeAccumulator acc accN = (acc ^ XXH64_round 0 accN) * PRIME64_1 + PRIME64_4 168 169 /** 170 * This function combines the four lane accumulators into a single 171 * 64-bit value. 172 */ 173 XXH64_converge : [4][64] -> [64] 174 XXH64_converge [acc1, acc2, acc3, acc4] = 175 foldl mergeAccumulator ((acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18)) [acc1, acc2, acc3, acc4] 176 177 /** 178 * This function digests an eight byte lane 179 */ 180 XXH64_digest8 : [64] -> [8][8] -> [64] 181 XXH64_digest8 acc lane = ((acc ^ XXH64_round 0 (toLE lane)) <<< 27) * PRIME64_1 + PRIME64_4 182 183 /** 184 * This function digests a four byte lane 185 */ 186 XXH64_digest4 : [64] -> [4][8] -> [64] 187 XXH64_digest4 acc lane = ((acc ^ (0 # toLE lane) * PRIME64_1) <<< 23) * PRIME64_2 + PRIME64_3 188 189 /** 190 * This function digests a single byte lane 191 */ 192 XXH64_digest1 : [64] -> [8] -> [64] 193 XXH64_digest1 acc lane = ((acc ^ (0 # lane) * PRIME64_5) <<< 11) * PRIME64_1 194 195 /** 196 * This function ensures that all input bits have a chance to impact 197 * any bit in the output digest, resulting in an unbiased 198 * distribution. 199 */ 200 XXH64_avalanche : [64] -> [64] 201 XXH64_avalanche acc0 = acc5 202 where acc1 = acc0 ^ (acc0 >> 33) 203 acc2 = acc1 * PRIME64_2 204 acc3 = acc2 ^ (acc2 >> 29) 205 acc4 = acc3 * PRIME64_3 206 acc5 = acc4 ^ (acc4 >> 32) 207