• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <stdlib.h>
2 #include <inttypes.h>
3 
4 #include "bloom.h"
5 #include "../hash.h"
6 #include "../minmax.h"
7 #include "../crc/xxhash.h"
8 #include "../crc/murmur3.h"
9 #include "../crc/crc32c.h"
10 #include "../crc/fnv.h"
11 
12 struct bloom {
13 	uint64_t nentries;
14 
15 	uint32_t *map;
16 };
17 
18 #define BITS_PER_INDEX	(sizeof(uint32_t) * 8)
19 #define BITS_INDEX_MASK	(BITS_PER_INDEX - 1)
20 
21 struct bloom_hash {
22 	unsigned int seed;
23 	uint32_t (*fn)(const void *, uint32_t, uint32_t);
24 };
25 
bloom_crc32c(const void * buf,uint32_t len,uint32_t seed)26 static uint32_t bloom_crc32c(const void *buf, uint32_t len, uint32_t seed)
27 {
28 	return fio_crc32c(buf, len);
29 }
30 
bloom_fnv(const void * buf,uint32_t len,uint32_t seed)31 static uint32_t bloom_fnv(const void *buf, uint32_t len, uint32_t seed)
32 {
33 	return fnv(buf, len, seed);
34 }
35 
36 #define BLOOM_SEED	0x8989
37 
38 static struct bloom_hash hashes[] = {
39 	{
40 		.seed = BLOOM_SEED,
41 		.fn = jhash,
42 	},
43 	{
44 		.seed = BLOOM_SEED,
45 		.fn = XXH32,
46 	},
47 	{
48 		.seed = BLOOM_SEED,
49 		.fn = murmurhash3,
50 	},
51 	{
52 		.seed = BLOOM_SEED,
53 		.fn = bloom_crc32c,
54 	},
55 	{
56 		.seed = BLOOM_SEED,
57 		.fn = bloom_fnv,
58 	},
59 };
60 
61 #define N_HASHES	5
62 
bloom_new(uint64_t entries)63 struct bloom *bloom_new(uint64_t entries)
64 {
65 	struct bloom *b;
66 	size_t no_uints;
67 
68 	crc32c_arm64_probe();
69 	crc32c_intel_probe();
70 
71 	b = malloc(sizeof(*b));
72 	b->nentries = entries;
73 	no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX;
74 	b->map = calloc(no_uints, sizeof(uint32_t));
75 	if (!b->map) {
76 		free(b);
77 		return NULL;
78 	}
79 
80 	return b;
81 }
82 
bloom_free(struct bloom * b)83 void bloom_free(struct bloom *b)
84 {
85 	free(b->map);
86 	free(b);
87 }
88 
__bloom_check(struct bloom * b,const void * data,unsigned int len,bool set)89 static bool __bloom_check(struct bloom *b, const void *data, unsigned int len,
90 			  bool set)
91 {
92 	uint32_t hash[N_HASHES];
93 	int i, was_set;
94 
95 	for (i = 0; i < N_HASHES; i++) {
96 		hash[i] = hashes[i].fn(data, len, hashes[i].seed);
97 		hash[i] = hash[i] % b->nentries;
98 	}
99 
100 	was_set = 0;
101 	for (i = 0; i < N_HASHES; i++) {
102 		const unsigned int index = hash[i] / BITS_PER_INDEX;
103 		const unsigned int bit = hash[i] & BITS_INDEX_MASK;
104 
105 		if (b->map[index] & (1U << bit))
106 			was_set++;
107 		else if (set)
108 			b->map[index] |= 1U << bit;
109 		else
110 			break;
111 	}
112 
113 	return was_set == N_HASHES;
114 }
115 
bloom_set(struct bloom * b,uint32_t * data,unsigned int nwords)116 bool bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords)
117 {
118 	return __bloom_check(b, data, nwords * sizeof(uint32_t), true);
119 }
120 
bloom_string(struct bloom * b,const char * data,unsigned int len,bool set)121 bool bloom_string(struct bloom *b, const char *data, unsigned int len,
122 		  bool set)
123 {
124 	return __bloom_check(b, data, len, set);
125 }
126