• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
2 /*
3  * Copyright (C) 2022 Alibaba Cloud
4  */
5 #include "erofs/dedupe.h"
6 #include "erofs/print.h"
7 #include "rb_tree.h"
8 #include "rolling_hash.h"
9 #include "sha256.h"
10 
erofs_memcmp2(const u8 * s1,const u8 * s2,unsigned long sz)11 unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
12 			    unsigned long sz)
13 {
14 	const unsigned long *a1, *a2;
15 	unsigned long n = sz;
16 
17 	if (sz < sizeof(long))
18 		goto out_bytes;
19 
20 	if (((long)s1 & (sizeof(long) - 1)) ==
21 			((long)s2 & (sizeof(long) - 1))) {
22 		while ((long)s1 & (sizeof(long) - 1)) {
23 			if (*s1 != *s2)
24 				break;
25 			++s1;
26 			++s2;
27 			--sz;
28 		}
29 
30 		a1 = (const unsigned long *)s1;
31 		a2 = (const unsigned long *)s2;
32 		while (sz >= sizeof(long)) {
33 			if (*a1 != *a2)
34 				break;
35 			++a1;
36 			++a2;
37 			sz -= sizeof(long);
38 		}
39 	} else {
40 		a1 = (const unsigned long *)s1;
41 		a2 = (const unsigned long *)s2;
42 		do {
43 			if (get_unaligned(a1) != get_unaligned(a2))
44 				break;
45 			++a1;
46 			++a2;
47 			sz -= sizeof(long);
48 		} while (sz >= sizeof(long));
49 	}
50 	s1 = (const u8 *)a1;
51 	s2 = (const u8 *)a2;
52 out_bytes:
53 	while (sz) {
54 		if (*s1 != *s2)
55 			break;
56 		++s1;
57 		++s2;
58 		--sz;
59 	}
60 	return n - sz;
61 }
62 
63 static unsigned int window_size, rollinghash_rm;
64 static struct rb_tree *dedupe_tree, *dedupe_subtree;
65 
66 struct z_erofs_dedupe_item {
67 	long long	hash;
68 	u8		prefix_sha256[32];
69 
70 	erofs_blk_t	compressed_blkaddr;
71 	unsigned int	compressed_blks;
72 
73 	int		original_length;
74 	bool		partial, raw;
75 	u8		extra_data[];
76 };
77 
z_erofs_dedupe_rbtree_cmp(struct rb_tree * self,struct rb_node * node_a,struct rb_node * node_b)78 static int z_erofs_dedupe_rbtree_cmp(struct rb_tree *self,
79 		struct rb_node *node_a, struct rb_node *node_b)
80 {
81 	struct z_erofs_dedupe_item *e_a = node_a->value;
82 	struct z_erofs_dedupe_item *e_b = node_b->value;
83 
84 	return (e_a->hash > e_b->hash) - (e_a->hash < e_b->hash);
85 }
86 
z_erofs_dedupe_match(struct z_erofs_dedupe_ctx * ctx)87 int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx)
88 {
89 	struct z_erofs_dedupe_item e_find;
90 	u8 *cur;
91 	bool initial = true;
92 
93 	if (!dedupe_tree)
94 		return -ENOENT;
95 
96 	if (ctx->cur > ctx->end - window_size)
97 		cur = ctx->end - window_size;
98 	else
99 		cur = ctx->cur;
100 
101 	/* move backward byte-by-byte */
102 	for (; cur >= ctx->start; --cur) {
103 		struct z_erofs_dedupe_item *e;
104 		unsigned int extra;
105 		u8 sha256[32];
106 
107 		if (initial) {
108 			/* initial try */
109 			e_find.hash = erofs_rolling_hash_init(cur, window_size, true);
110 			initial = false;
111 		} else {
112 			e_find.hash = erofs_rolling_hash_advance(e_find.hash,
113 				rollinghash_rm, cur[window_size], cur[0]);
114 		}
115 
116 		e = rb_tree_find(dedupe_tree, &e_find);
117 		if (!e) {
118 			e = rb_tree_find(dedupe_subtree, &e_find);
119 			if (!e)
120 				continue;
121 		}
122 
123 		erofs_sha256(cur, window_size, sha256);
124 		if (memcmp(sha256, e->prefix_sha256, sizeof(sha256)))
125 			continue;
126 
127 		extra = min_t(unsigned int, ctx->end - cur - window_size,
128 			      e->original_length - window_size);
129 		extra = erofs_memcmp2(cur + window_size, e->extra_data, extra);
130 		if (window_size + extra <= ctx->cur - cur)
131 			continue;
132 		ctx->cur = cur;
133 		ctx->e.length = window_size + extra;
134 		ctx->e.partial = e->partial ||
135 			(window_size + extra < e->original_length);
136 		ctx->e.raw = e->raw;
137 		ctx->e.blkaddr = e->compressed_blkaddr;
138 		ctx->e.compressedblks = e->compressed_blks;
139 		return 0;
140 	}
141 	return -ENOENT;
142 }
143 
z_erofs_dedupe_insert(struct z_erofs_inmem_extent * e,void * original_data)144 int z_erofs_dedupe_insert(struct z_erofs_inmem_extent *e,
145 			  void *original_data)
146 {
147 	struct z_erofs_dedupe_item *di;
148 
149 	if (!dedupe_subtree || e->length < window_size)
150 		return 0;
151 
152 	di = malloc(sizeof(*di) + e->length - window_size);
153 	if (!di)
154 		return -ENOMEM;
155 
156 	di->original_length = e->length;
157 	erofs_sha256(original_data, window_size, di->prefix_sha256);
158 	di->hash = erofs_rolling_hash_init(original_data,
159 			window_size, true);
160 	memcpy(di->extra_data, original_data + window_size,
161 	       e->length - window_size);
162 	di->compressed_blkaddr = e->blkaddr;
163 	di->compressed_blks = e->compressedblks;
164 	di->partial = e->partial;
165 	di->raw = e->raw;
166 
167 	/* with the same rolling hash */
168 	if (!rb_tree_insert(dedupe_subtree, di))
169 		free(di);
170 	return 0;
171 }
172 
z_erofs_dedupe_node_free_cb(struct rb_tree * self,struct rb_node * node)173 static void z_erofs_dedupe_node_free_cb(struct rb_tree *self,
174 					struct rb_node *node)
175 {
176 	free(node->value);
177 	rb_tree_node_dealloc_cb(self, node);
178 }
179 
z_erofs_dedupe_commit(bool drop)180 void z_erofs_dedupe_commit(bool drop)
181 {
182 	if (!dedupe_subtree)
183 		return;
184 	if (!drop) {
185 		struct rb_iter iter;
186 		struct z_erofs_dedupe_item *di;
187 
188 		di = rb_iter_first(&iter, dedupe_subtree);
189 		while (di) {
190 			if (!rb_tree_insert(dedupe_tree, di))
191 				DBG_BUGON(1);
192 			di = rb_iter_next(&iter);
193 		}
194 		/*rb_iter_dealloc(iter);*/
195 		rb_tree_dealloc(dedupe_subtree, rb_tree_node_dealloc_cb);
196 	} else {
197 		rb_tree_dealloc(dedupe_subtree, z_erofs_dedupe_node_free_cb);
198 	}
199 	dedupe_subtree = rb_tree_create(z_erofs_dedupe_rbtree_cmp);
200 }
201 
z_erofs_dedupe_init(unsigned int wsiz)202 int z_erofs_dedupe_init(unsigned int wsiz)
203 {
204 	dedupe_tree = rb_tree_create(z_erofs_dedupe_rbtree_cmp);
205 	if (!dedupe_tree)
206 		return -ENOMEM;
207 
208 	dedupe_subtree = rb_tree_create(z_erofs_dedupe_rbtree_cmp);
209 	if (!dedupe_subtree) {
210 		rb_tree_dealloc(dedupe_subtree, NULL);
211 		return -ENOMEM;
212 	}
213 	window_size = wsiz;
214 	rollinghash_rm = erofs_rollinghash_calc_rm(window_size);
215 	return 0;
216 }
217 
z_erofs_dedupe_exit(void)218 void z_erofs_dedupe_exit(void)
219 {
220 	z_erofs_dedupe_commit(true);
221 	rb_tree_dealloc(dedupe_subtree, NULL);
222 	rb_tree_dealloc(dedupe_tree, z_erofs_dedupe_node_free_cb);
223 }
224