1 /**************************************************************************************************
2 * IOWOW library
3 *
4 * MIT License
5 *
6 * Copyright (c) 2012-2022 Softmotions Ltd <info@softmotions.com>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in all
16 * copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *************************************************************************************************/
26
27 #include "iwfsmfile.h"
28 #include "iwavl.h"
29 #include "iwbits.h"
30 #include "iwlog.h"
31 #include "iwp.h"
32 #include "iwutils.h"
33 #include "iwcfg.h"
34
35 #include <pthread.h>
36
37 void iwfs_fsmdbg_dump_fsm_tree(IWFS_FSM *f, const char *hdr);
38
39 /**
40 * Free-space blocks-tree key.
41 */
42 struct bkey {
43 uint32_t off;
44 uint32_t len;
45 };
46
47 struct bkey_node {
48 struct bkey key;
49 struct iwavl_node node;
50 };
51
52 #define BKEY(nptr_) iwavl_entry(nptr_, struct bkey_node, node)->key
53
54 /** Additional options for `_fsm_set_bit_status_lw` routine */
55 typedef uint8_t fsm_bmopts_t;
56
57 /** No options. */
58 #define FSM_BM_NONE ((fsm_bmopts_t) 0x00U)
59
60 /** Do not modify bitmap. */
61 #define FSM_BM_DRY_RUN ((fsm_bmopts_t) 0x01U)
62
63 /** Perform strict checking of bitmap consistency */
64 #define FSM_BM_STRICT ((fsm_bmopts_t) 0x02U)
65
66 /* Maximum size of block: 1Mb */
67 #define FSM_MAX_BLOCK_POW 20
68
69 /* Maximum number of records used in allocation statistics */
70 #define FSM_MAX_STATS_COUNT 0x0000ffff
71
72 #define FSM_ENSURE_OPEN(impl_) \
73 if (!(impl_) || !(impl_)->f) return IW_ERROR_INVALID_STATE;
74
75 #define FSM_ENSURE_OPEN2(f_) \
76 if (!(f_) || !(f_)->impl) return IW_ERROR_INVALID_STATE;
77
78 #define FSMBK_OFFSET(b_) ((b_)->off)
79
80 #define FSMBK_LENGTH(b_) ((b_)->len)
81
82 ////////////////////////////////////////////////////////////////////////////////////////////////////
83
84 struct fsm {
85 IWFS_EXT pool; /**< Underlying rwl file. */
86 uint64_t bmlen; /**< Free-space bitmap block length in bytes. */
87 uint64_t bmoff; /**< Free-space bitmap block offset in bytes. */
88 uint64_t lfbkoff; /**< Offset in blocks of free block chunk with the largest offset. */
89 uint64_t lfbklen; /**< Length in blocks of free block chunk with the largest offset. */
90 uint64_t crzsum; /**< Cumulative sum all allocated blocks */
91 uint64_t crzvar; /**< Record sizes standard variance (deviation^2 * N) */
92 uint32_t hdrlen; /**< Length of custom file header */
93 uint32_t crznum; /**< Number of all allocated continuous areas acquired by `allocate` */
94 uint32_t fsmnum; /**< Number of records in fsm */
95 IWFS_FSM *f; /**< Self reference. */
96 IWDLSNR *dlsnr; /**< Data events listener */
97 struct iwavl_node *root; /**< Free-space tree */
98 pthread_rwlock_t *ctlrwlk; /**< Methods RW lock */
99 size_t aunit; /**< System allocation unit size.
100 - Page size on *NIX
101 - Minimal allocation unit for WIN32 */
102 iwfs_fsm_openflags oflags; /**< Operation mode flags. */
103 iwfs_omode omode; /**< Open mode. */
104 uint8_t bpow; /**< Block size power for 2 */
105 bool mmap_all; /**< Mmap all file data */
106 iwfs_ext_mmap_opts_t mmap_opts; /**< Defaul mmap options used in `add_mmap` */
107 };
108
109 static iwrc _fsm_ensure_size_lw(struct fsm *fsm, off_t size);
110
111 ////////////////////////////////////////////////////////////////////////////////////////////////////
112
_fsm_cmp_key(const struct bkey * a,const struct bkey * b)113 IW_INLINE int _fsm_cmp_key(const struct bkey *a, const struct bkey *b) {
114 int ret = ((FSMBK_LENGTH(b) < FSMBK_LENGTH(a)) - (FSMBK_LENGTH(a) < FSMBK_LENGTH(b)));
115 if (ret) {
116 return ret;
117 } else {
118 return ((FSMBK_OFFSET(b) < FSMBK_OFFSET(a)) - (FSMBK_OFFSET(a) < FSMBK_OFFSET(b)));
119 }
120 }
121
_fsm_cmp_node(const struct iwavl_node * an,const struct iwavl_node * bn)122 IW_INLINE int _fsm_cmp_node(const struct iwavl_node *an, const struct iwavl_node *bn) {
123 const struct bkey *ak = &BKEY(an);
124 const struct bkey *bk = &BKEY(bn);
125 return _fsm_cmp_key(ak, bk);
126 }
127
_fsm_cmp_ctx(const void * ctx,const struct iwavl_node * bn)128 IW_INLINE int _fsm_cmp_ctx(const void *ctx, const struct iwavl_node *bn) {
129 const struct bkey *ak = ctx;
130 const struct bkey *bk = &BKEY(bn);
131 return _fsm_cmp_key(ak, bk);
132 }
133
_fsm_ctrl_wlock(struct fsm * fsm)134 IW_INLINE iwrc _fsm_ctrl_wlock(struct fsm *fsm) {
135 int rci = fsm->ctlrwlk ? pthread_rwlock_wrlock(fsm->ctlrwlk) : 0;
136 return (rci ? iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci) : 0);
137 }
138
_fsm_ctrl_rlock(struct fsm * fsm)139 IW_INLINE iwrc _fsm_ctrl_rlock(struct fsm *fsm) {
140 int rci = fsm->ctlrwlk ? pthread_rwlock_rdlock(fsm->ctlrwlk) : 0;
141 return (rci ? iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci) : 0);
142 }
143
_fsm_ctrl_unlock(struct fsm * fsm)144 IW_INLINE iwrc _fsm_ctrl_unlock(struct fsm *fsm) {
145 int rci = fsm->ctlrwlk ? pthread_rwlock_unlock(fsm->ctlrwlk) : 0;
146 return (rci ? iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci) : 0);
147 }
148
_fsm_bmptr(struct fsm * fsm,uint64_t ** bmptr)149 IW_INLINE iwrc _fsm_bmptr(struct fsm *fsm, uint64_t **bmptr) {
150 size_t sp;
151 uint8_t *mm;
152 *bmptr = 0;
153 // get mmap pointer without locked
154 iwrc rc = fsm->pool.probe_mmap(&fsm->pool, fsm->mmap_all ? 0 : fsm->bmoff, &mm, &sp);
155 RCRET(rc);
156 if (fsm->mmap_all) {
157 if (sp < fsm->bmoff + fsm->bmlen) {
158 return IWFS_ERROR_NOT_MMAPED;
159 }
160 *bmptr = (uint64_t*) (mm + fsm->bmoff);
161 } else {
162 if (sp < fsm->bmlen) {
163 return IWFS_ERROR_NOT_MMAPED;
164 }
165 *bmptr = (uint64_t*) mm;
166 }
167 return 0;
168 }
169
_fsm_init_bkey_node(struct bkey_node * n,uint64_t offset_blk,uint64_t len_blk)170 IW_INLINE WUR iwrc _fsm_init_bkey_node(struct bkey_node *n, uint64_t offset_blk, uint64_t len_blk) {
171 if (offset_blk > (uint32_t) -1 || len_blk > (uint32_t) -1) {
172 return IW_ERROR_OVERFLOW;
173 }
174 n->key.off = (uint32_t) offset_blk;
175 n->key.len = (uint32_t) len_blk;
176 return 0;
177 }
178
_fsm_init_bkey(struct bkey * k,uint64_t offset_blk,uint64_t len_blk)179 IW_INLINE iwrc _fsm_init_bkey(struct bkey *k, uint64_t offset_blk, uint64_t len_blk) {
180 if (offset_blk > (uint32_t) -1 || len_blk > (uint32_t) -1) {
181 return IW_ERROR_OVERFLOW;
182 }
183 k->off = (uint32_t) offset_blk;
184 k->len = (uint32_t) len_blk;
185 return 0;
186 }
187
_fsm_del_fbk2(struct fsm * fsm,struct iwavl_node * n)188 IW_INLINE void _fsm_del_fbk2(struct fsm *fsm, struct iwavl_node *n) {
189 iwavl_remove(&fsm->root, n), --fsm->fsmnum;
190 struct bkey_node *bk = iwavl_entry(n, struct bkey_node, node);
191 if (bk->key.off == fsm->lfbkoff) {
192 fsm->lfbkoff = 0;
193 fsm->lfbklen = 0;
194 }
195 free(bk);
196 }
197
_fsm_del_fbk(struct fsm * fsm,uint64_t offset_blk,uint64_t length_blk)198 IW_INLINE void _fsm_del_fbk(struct fsm *fsm, uint64_t offset_blk, uint64_t length_blk) {
199 struct bkey bkey;
200 if (!_fsm_init_bkey(&bkey, offset_blk, length_blk)) {
201 struct iwavl_node *n = iwavl_lookup(fsm->root, &bkey, _fsm_cmp_ctx);
202 assert(n);
203 if (n) {
204 _fsm_del_fbk2(fsm, n);
205 }
206 }
207 }
208
_fsm_put_fbk(struct fsm * fsm,uint64_t offset_blk,uint64_t length_blk)209 IW_INLINE iwrc _fsm_put_fbk(struct fsm *fsm, uint64_t offset_blk, uint64_t length_blk) {
210 iwrc rc = 0;
211 struct bkey_node *bk;
212 RCB(finish, bk = malloc(sizeof(*bk)));
213 RCC(rc, finish, _fsm_init_bkey_node(bk, offset_blk, length_blk));
214 if (iwavl_insert(&fsm->root, &bk->node, _fsm_cmp_node)) {
215 free(bk);
216 } else {
217 ++fsm->fsmnum;
218 if (offset_blk + length_blk >= fsm->lfbkoff + fsm->lfbklen) {
219 fsm->lfbkoff = offset_blk;
220 fsm->lfbklen = length_blk;
221 }
222 }
223 finish:
224 if (rc) {
225 free(bk);
226 }
227 return rc;
228 }
229
_fsm_find_matching_fblock_lw(struct fsm * fsm,uint64_t offset_blk,uint64_t length_blk,iwfs_fsm_aflags opts)230 IW_INLINE const struct iwavl_node* _fsm_find_matching_fblock_lw(
231 struct fsm *fsm,
232 uint64_t offset_blk,
233 uint64_t length_blk,
234 iwfs_fsm_aflags opts
235 ) {
236 struct bkey bk;
237 const struct iwavl_node *ub, *lb;
238 if (_fsm_init_bkey(&bk, offset_blk, length_blk)) {
239 return 0;
240 }
241
242 iwavl_lookup_bounds(fsm->root, &bk, _fsm_cmp_ctx, &lb, &ub);
243
244 struct bkey *uk = ub ? &BKEY(ub) : 0;
245 struct bkey *lk = lb ? &BKEY(lb) : 0;
246
247 uint64_t lklength = lk ? FSMBK_LENGTH(lk) : 0;
248 uint64_t uklength = uk ? FSMBK_LENGTH(uk) : 0;
249
250 if (lklength == length_blk) {
251 return lb;
252 } else if (uklength == length_blk) {
253 return ub;
254 }
255 if (lklength > length_blk) {
256 return lb;
257 } else if (uklength > length_blk) {
258 return ub;
259 }
260 return 0;
261 }
262
263 /**
264 * @brief Set the allocation bits in the fsm bitmap.
265 *
266 * @param fms
267 * @param offset_bits Bit offset in the bitmap.
268 * @param length_bits Number of bits to set
269 * @param bit_status If `1` bits will be set to `1` otherwise `0`
270 * @param opts Operation options
271 */
_fsm_set_bit_status_lw(struct fsm * fsm,const uint64_t offset_bits,const uint64_t length_bits_,const int bit_status,const fsm_bmopts_t opts)272 static iwrc _fsm_set_bit_status_lw(
273 struct fsm *fsm,
274 const uint64_t offset_bits,
275 const uint64_t length_bits_,
276 const int bit_status,
277 const fsm_bmopts_t opts
278 ) {
279 iwrc rc;
280 size_t sp;
281 uint8_t *mm;
282 register int64_t length_bits = length_bits_;
283 register uint64_t *p, set_mask;
284 uint64_t bend = offset_bits + length_bits;
285 int set_bits;
286
287 if (bend < offset_bits) { // overflow
288 return IW_ERROR_OUT_OF_BOUNDS;
289 }
290 assert(fsm->bmlen * 8 >= offset_bits + length_bits);
291 if (fsm->bmlen * 8 < offset_bits + length_bits) {
292 return IWFS_ERROR_FSM_SEGMENTATION;
293 }
294 if (fsm->mmap_all) {
295 rc = fsm->pool.probe_mmap(&fsm->pool, 0, &mm, &sp);
296 RCRET(rc);
297 if (sp < fsm->bmoff + fsm->bmlen) {
298 return IWFS_ERROR_NOT_MMAPED;
299 } else {
300 mm += fsm->bmoff;
301 }
302 } else {
303 rc = fsm->pool.probe_mmap(&fsm->pool, fsm->bmoff, &mm, &sp);
304 RCRET(rc);
305 if (sp < fsm->bmlen) {
306 return IWFS_ERROR_NOT_MMAPED;
307 }
308 }
309 p = ((uint64_t*) mm) + offset_bits / 64;
310 set_bits = 64 - (offset_bits & (64 - 1)); // NOLINT
311 set_mask = (~((uint64_t) 0) << (offset_bits & (64 - 1)));
312
313 #ifdef IW_BIGENDIAN
314 while (length_bits - set_bits >= 0) {
315 uint64_t pv = *p;
316 pv = IW_ITOHLL(pv);
317 if (bit_status) {
318 if ((opts & FSM_BM_STRICT) && (pv & set_mask)) {
319 rc = IWFS_ERROR_FSM_SEGMENTATION;
320 }
321 if ((opts & FSM_BM_DRY_RUN) == 0) {
322 pv |= set_mask;
323 *p = IW_HTOILL(pv);
324 }
325 } else {
326 if ((opts & FSM_BM_STRICT) && ((pv & set_mask) != set_mask)) {
327 rc = IWFS_ERROR_FSM_SEGMENTATION;
328 }
329 if ((opts & FSM_BM_DRY_RUN) == 0) {
330 pv &= ~set_mask;
331 *p = IW_HTOILL(pv);
332 }
333 }
334 length_bits -= set_bits;
335 set_bits = 64;
336 set_mask = ~((uint64_t) 0);
337 ++p;
338 }
339 if (length_bits) {
340 uint64_t pv = *p;
341 pv = IW_ITOHLL(pv);
342 set_mask &= (bend & (64 - 1)) ? ((((uint64_t) 1) << (bend & (64 - 1))) - 1) : ~((uint64_t) 0);
343 if (bit_status) {
344 if ((opts & FSM_BM_STRICT) && (pv & set_mask)) {
345 rc = IWFS_ERROR_FSM_SEGMENTATION;
346 }
347 if ((opts & FSM_BM_DRY_RUN) == 0) {
348 pv |= set_mask;
349 *p = IW_HTOILL(pv);
350 }
351 } else {
352 if ((opts & FSM_BM_STRICT) && ((pv & set_mask) != set_mask)) {
353 rc = IWFS_ERROR_FSM_SEGMENTATION;
354 }
355 if ((opts & FSM_BM_DRY_RUN) == 0) {
356 pv &= ~set_mask;
357 *p = IW_HTOILL(pv);
358 }
359 }
360 }
361 #else
362 while (length_bits - set_bits >= 0) {
363 if (bit_status) {
364 if ((opts & FSM_BM_STRICT) && (*p & set_mask)) {
365 rc = IWFS_ERROR_FSM_SEGMENTATION;
366 }
367 if ((opts & FSM_BM_DRY_RUN) == 0) {
368 *p |= set_mask;
369 }
370 } else {
371 if ((opts & FSM_BM_STRICT) && ((*p & set_mask) != set_mask)) {
372 rc = IWFS_ERROR_FSM_SEGMENTATION;
373 }
374 if ((opts & FSM_BM_DRY_RUN) == 0) {
375 *p &= ~set_mask;
376 }
377 }
378 length_bits -= set_bits;
379 set_bits = 64;
380 set_mask = ~((uint64_t) 0);
381 ++p;
382 }
383 if (length_bits) {
384 set_mask &= (bend & (64 - 1)) ? ((((uint64_t) 1) << (bend & (64 - 1))) - 1) : ~((uint64_t) 0);
385 if (bit_status) {
386 if ((opts & FSM_BM_STRICT) && (*p & set_mask)) {
387 rc = IWFS_ERROR_FSM_SEGMENTATION;
388 }
389 if ((opts & FSM_BM_DRY_RUN) == 0) {
390 *p |= set_mask;
391 }
392 } else {
393 if ((opts & FSM_BM_STRICT) && ((*p & set_mask) != set_mask)) {
394 rc = IWFS_ERROR_FSM_SEGMENTATION;
395 }
396 if ((opts & FSM_BM_DRY_RUN) == 0) {
397 *p &= ~set_mask;
398 }
399 }
400 }
401 #endif
402 if (!rc && fsm->dlsnr) {
403 uint64_t so = offset_bits / 8;
404 uint64_t lb = length_bits_ + offset_bits % 8;
405 uint64_t dl = lb / 8;
406 if (lb % 8) {
407 ++dl;
408 }
409 rc = fsm->dlsnr->onwrite(fsm->dlsnr, fsm->bmoff + so, mm + so, dl, 0);
410 }
411 return rc;
412 }
413
414 /**
415 * @brief Allocate a continuous segment of blocks with page aligned offset.
416 *
417 * @param fsm `struct fsm`
418 * @param length_blk Desired segment length in blocks.
419 * @param [in,out] offset_blk Allocated segment offset in blocks will be stored into.
420 It also specified the desired segment offset to provide
421 * allocation locality.
422 * @param [out] olength_blk Assigned segment length in blocks.
423 * @param max_offset_blk Maximal offset of allocated block.
424 * @param opts Allocation options.
425 */
_fsm_blk_allocate_aligned_lw(struct fsm * fsm,const uint64_t length_blk,uint64_t * offset_blk,uint64_t * olength_blk,const uint64_t max_offset_blk,const iwfs_fsm_aflags opts)426 static iwrc _fsm_blk_allocate_aligned_lw(
427 struct fsm *fsm,
428 const uint64_t length_blk,
429 uint64_t *offset_blk,
430 uint64_t *olength_blk,
431 const uint64_t max_offset_blk,
432 const iwfs_fsm_aflags opts
433 ) {
434 fsm_bmopts_t bopts = FSM_BM_NONE;
435 size_t aunit_blk = (fsm->aunit >> fsm->bpow);
436 assert(fsm && length_blk > 0);
437 if (fsm->oflags & IWFSM_STRICT) {
438 bopts |= FSM_BM_STRICT;
439 }
440 *olength_blk = 0;
441 *offset_blk = 0;
442
443 /* First attempt */
444 const struct iwavl_node *nn = _fsm_find_matching_fblock_lw(fsm, 0, length_blk + aunit_blk, opts);
445 if (!nn) {
446 nn = _fsm_find_matching_fblock_lw(fsm, 0, length_blk, opts);
447 if (!nn) {
448 return IWFS_ERROR_NO_FREE_SPACE;
449 }
450 }
451
452 struct bkey *nk = &BKEY(nn);
453 uint64_t akoff = FSMBK_OFFSET(nk);
454 uint64_t aklen = FSMBK_LENGTH(nk);
455 uint64_t noff = IW_ROUNDUP(akoff, aunit_blk);
456
457 if ((noff <= max_offset_blk) && (noff < aklen + akoff) && (aklen - (noff - akoff) >= length_blk)) {
458 _fsm_del_fbk(fsm, akoff, aklen);
459 aklen = aklen - (noff - akoff);
460 if (noff > akoff) {
461 _fsm_put_fbk(fsm, akoff, noff - akoff);
462 }
463 if (aklen > length_blk) {
464 _fsm_put_fbk(fsm, noff + length_blk, aklen - length_blk);
465 }
466 *offset_blk = noff;
467 *olength_blk = length_blk;
468 return _fsm_set_bit_status_lw(fsm, noff, length_blk, 1, bopts);
469 }
470
471 aklen = 0;
472 akoff = UINT64_MAX;
473
474 // full scan
475 for (struct iwavl_node *n = iwavl_first_in_order(fsm->root); n; n = iwavl_next_in_order(n)) {
476 struct bkey *k = &BKEY(n);
477 uint64_t koff = FSMBK_OFFSET(k);
478 uint64_t klen = FSMBK_LENGTH(k);
479 if (koff < akoff) {
480 noff = IW_ROUNDUP(koff, aunit_blk);
481 if (noff <= max_offset_blk && (noff < klen + koff) && (klen - (noff - koff) >= length_blk)) {
482 akoff = koff;
483 aklen = klen;
484 }
485 }
486 }
487
488 if (akoff == UINT64_MAX) {
489 return IWFS_ERROR_NO_FREE_SPACE;
490 }
491 _fsm_del_fbk(fsm, akoff, aklen);
492 noff = IW_ROUNDUP(akoff, aunit_blk);
493 aklen = aklen - (noff - akoff);
494 if (noff > akoff) {
495 _fsm_put_fbk(fsm, akoff, noff - akoff);
496 }
497 if (aklen > length_blk) {
498 _fsm_put_fbk(fsm, noff + length_blk, aklen - length_blk);
499 }
500 *offset_blk = noff;
501 *olength_blk = length_blk;
502 return _fsm_set_bit_status_lw(fsm, noff, length_blk, 1, bopts);
503 }
504
_fsm_node_destroy(struct iwavl_node * root)505 static void _fsm_node_destroy(struct iwavl_node *root) {
506 for (struct iwavl_node *n = iwavl_first_in_postorder(root), *p;
507 n && (p = iwavl_get_parent(n), 1);
508 n = iwavl_next_in_postorder(n, p)) {
509 struct bkey_node *bk = iwavl_entry(n, struct bkey_node, node);
510 free(bk);
511 }
512 }
513
514 /**
515 * @brief Load existing bitmap area into free-space search tree.
516 * @param fsm `struct fsm`
517 * @param bm Bitmap area start ptr
518 * @param len Bitmap area length in bytes.
519 */
_fsm_load_fsm_lw(struct fsm * fsm,const uint8_t * bm,uint64_t len)520 static void _fsm_load_fsm_lw(struct fsm *fsm, const uint8_t *bm, uint64_t len) {
521 uint64_t cbnum = 0, fbklength = 0, fbkoffset = 0;
522
523 _fsm_node_destroy(fsm->root);
524 fsm->root = 0;
525 fsm->fsmnum = 0;
526
527 for (uint64_t b = 0; b < len; ++b) {
528 register uint8_t bb = bm[b];
529 if (bb == 0) {
530 fbklength += 8;
531 cbnum += 8;
532 } else if (bb == 0xffU) {
533 if (fbklength) {
534 fbkoffset = cbnum - fbklength;
535 _fsm_put_fbk(fsm, fbkoffset, fbklength);
536 fbklength = 0;
537 }
538 cbnum += 8;
539 } else {
540 for (int i = 0; i < 8; ++i, ++cbnum) {
541 if (bb & (1U << i)) {
542 if (fbklength) {
543 fbkoffset = cbnum - fbklength;
544 _fsm_put_fbk(fsm, fbkoffset, fbklength);
545 fbklength = 0;
546 }
547 } else {
548 ++fbklength;
549 }
550 }
551 }
552 }
553 if (fbklength > 0) {
554 fbkoffset = len * 8 - fbklength;
555 _fsm_put_fbk(fsm, fbkoffset, fbklength);
556 }
557 }
558
559 /**
560 * @brief Flush a current `iwfsmfile` metadata into the file header.
561 * @param fsm
562 * @param is_sync If `1` perform mmap sync.
563 * @return
564 */
_fsm_write_meta_lw(struct fsm * fsm)565 static iwrc _fsm_write_meta_lw(struct fsm *fsm) {
566 uint64_t llv;
567 size_t wlen;
568 uint32_t sp = 0, lv;
569 uint8_t hdr[IWFSM_CUSTOM_HDR_DATA_OFFSET] = { 0 };
570
571 /*
572 [FSM_CTL_MAGICK u32][block pow u8]
573 [bmoffset u64][bmlength u64]
574 [u64 crzsum][u32 crznum][u64 crszvar][u256 reserved]
575 [custom header size u32][custom header data...]
576 [fsm data...]
577 */
578
579 /* magic */
580 lv = IW_HTOIL(IWFSM_MAGICK);
581 assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
582 memcpy(hdr + sp, &lv, sizeof(lv));
583 sp += sizeof(lv);
584
585 /* block pow */
586 static_assert(sizeof(fsm->bpow) == 1, "sizeof(fms->bpow) == 1");
587 assert(sp + sizeof(fsm->bpow) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
588 memcpy(hdr + sp, &fsm->bpow, sizeof(fsm->bpow));
589 sp += sizeof(fsm->bpow);
590
591 /* fsm bitmap block offset */
592 llv = fsm->bmoff;
593 llv = IW_HTOILL(llv);
594 assert(sp + sizeof(llv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
595 memcpy(hdr + sp, &llv, sizeof(llv));
596 sp += sizeof(llv);
597
598 /* fsm bitmap block length */
599 llv = fsm->bmlen;
600 llv = IW_HTOILL(llv);
601 assert(sp + sizeof(llv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
602 memcpy(hdr + sp, &llv, sizeof(llv));
603 sp += sizeof(llv);
604
605 /* Cumulative sum of record sizes acquired by `allocate` */
606 llv = fsm->crzsum;
607 llv = IW_HTOILL(llv);
608 assert(sp + sizeof(llv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
609 memcpy(hdr + sp, &llv, sizeof(llv));
610 sp += sizeof(llv);
611
612 /* Cumulative number of records acquired by `allocated` */
613 lv = fsm->crznum;
614 lv = IW_HTOIL(lv);
615 assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
616 memcpy(hdr + sp, &lv, sizeof(lv));
617 sp += sizeof(lv);
618
619 /* Record sizes standard variance (deviation^2 * N) */
620 llv = fsm->crzvar;
621 llv = IW_HTOILL(llv);
622 assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
623 memcpy(hdr + sp, &llv, sizeof(llv));
624 sp += sizeof(llv);
625
626 /* Reserved */
627 sp += 32;
628
629 /* Size of header */
630 lv = fsm->hdrlen;
631 lv = IW_HTOIL(lv);
632 assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
633 memcpy(hdr + sp, &lv, sizeof(lv));
634 sp += sizeof(lv);
635
636 assert(sp == IWFSM_CUSTOM_HDR_DATA_OFFSET);
637 return fsm->pool.write(&fsm->pool, 0, hdr, IWFSM_CUSTOM_HDR_DATA_OFFSET, &wlen);
638 }
639
640 /**
641 * @brief Search for the first next set bit position
642 * starting from the specified offset bit (INCLUDED).
643 */
_fsm_find_next_set_bit(const uint64_t * addr,register uint64_t offset_bit,const uint64_t max_offset_bit,int * found)644 static uint64_t _fsm_find_next_set_bit(
645 const uint64_t *addr,
646 register uint64_t offset_bit,
647 const uint64_t max_offset_bit,
648 int *found
649 ) {
650 *found = 0;
651 register uint64_t bit, size;
652 register const uint64_t *p = addr + offset_bit / 64;
653 if (offset_bit >= max_offset_bit) {
654 return 0;
655 }
656 bit = offset_bit & (64 - 1);
657 offset_bit -= bit;
658 size = max_offset_bit - offset_bit;
659
660 #ifdef IW_BIGENDIAN
661 uint64_t pv = *p;
662 if (bit) {
663 pv = IW_ITOHLL(pv) & (~((uint64_t) 0) << bit);
664 if (pv) {
665 pv = iwbits_find_first_sbit64(pv);
666 if (pv >= size) {
667 return 0;
668 } else {
669 *found = 1;
670 return offset_bit + pv;
671 }
672 }
673 if (size <= 64) {
674 return 0;
675 }
676 offset_bit += 64;
677 size -= 64;
678 ++p;
679 }
680 while (size & ~(64 - 1)) {
681 pv = *(p++);
682 if (pv) {
683 *found = 1;
684 return offset_bit + iwbits_find_first_sbit64(IW_ITOHLL(pv));
685 }
686 offset_bit += 64;
687 size -= 64;
688 }
689 if (!size) {
690 return 0;
691 }
692 pv = *p;
693 pv = IW_ITOHLL(pv) & (~((uint64_t) 0) >> (64 - size));
694 if (pv) {
695 *found = 1;
696 return offset_bit + iwbits_find_first_sbit64(pv);
697 } else {
698 return 0;
699 }
700 #else
701 register uint64_t tmp;
702 if (bit) {
703 tmp = *p & (~((uint64_t) 0) << bit);
704 if (tmp) {
705 tmp = iwbits_find_first_sbit64(tmp);
706 if (tmp >= size) {
707 return 0;
708 } else {
709 *found = 1;
710 return offset_bit + tmp;
711 }
712 }
713 if (size <= 64) {
714 return 0;
715 }
716 offset_bit += 64;
717 size -= 64;
718 ++p;
719 }
720 while (size & ~(64 - 1)) {
721 if ((tmp = *(p++))) {
722 *found = 1;
723 return offset_bit + iwbits_find_first_sbit64(tmp);
724 }
725 offset_bit += 64;
726 size -= 64;
727 }
728 if (!size) {
729 return 0;
730 }
731 tmp = (*p) & (~((uint64_t) 0) >> (64 - size));
732 if (tmp) {
733 *found = 1;
734 return offset_bit + iwbits_find_first_sbit64(tmp);
735 } else {
736 return 0;
737 }
738 #endif
739 }
740
741 /**
742 * @brief Search for the first previous set bit position
743 * starting from the specified offset_bit (EXCLUDED).
744 */
_fsm_find_prev_set_bit(const uint64_t * addr,register uint64_t offset_bit,const uint64_t min_offset_bit,int * found)745 static uint64_t _fsm_find_prev_set_bit(
746 const uint64_t *addr,
747 register uint64_t offset_bit,
748 const uint64_t min_offset_bit,
749 int *found
750 ) {
751 register const uint64_t *p;
752 register uint64_t tmp, bit, size;
753 *found = 0;
754 if (min_offset_bit >= offset_bit) {
755 return 0;
756 }
757 size = offset_bit - min_offset_bit;
758 bit = offset_bit & (64 - 1);
759 p = addr + offset_bit / 64;
760
761 #ifdef IW_BIGENDIAN
762 uint64_t pv;
763 if (bit) {
764 pv = *p;
765 pv = (iwbits_reverse_64(IW_ITOHLL(pv)) >> (64 - bit));
766 if (pv) {
767 pv = iwbits_find_first_sbit64(pv);
768 if (pv >= size) {
769 return 0;
770 } else {
771 *found = 1;
772 assert(offset_bit > pv);
773 return offset_bit > pv ? offset_bit - pv - 1 : 0;
774 }
775 }
776 offset_bit -= bit;
777 size -= bit;
778 }
779 while (size & ~(64 - 1)) {
780 if (*(--p)) {
781 pv = *p;
782 *found = 1;
783 tmp = iwbits_find_first_sbit64(iwbits_reverse_64(IW_ITOHLL(pv)));
784 assert(offset_bit > tmp);
785 return offset_bit > tmp ? offset_bit - tmp - 1 : 0;
786 }
787 offset_bit -= 64;
788 size -= 64;
789 }
790 if (size == 0) {
791 return 0;
792 }
793 pv = *(--p);
794 tmp = iwbits_reverse_64(IW_ITOHLL(pv)) & ((((uint64_t) 1) << size) - 1);
795 #else
796 if (bit) {
797 tmp = (iwbits_reverse_64(*p) >> (64 - bit));
798 if (tmp) {
799 tmp = iwbits_find_first_sbit64(tmp);
800 if (tmp >= size) {
801 return 0;
802 } else {
803 *found = 1;
804 assert(offset_bit > tmp);
805 return offset_bit > tmp ? offset_bit - tmp - 1 : 0;
806 }
807 }
808 offset_bit -= bit;
809 size -= bit;
810 }
811 while (size & ~(64 - 1)) {
812 if (*(--p)) {
813 *found = 1;
814 tmp = iwbits_find_first_sbit64(iwbits_reverse_64(*p));
815 assert(offset_bit > tmp);
816 return offset_bit > tmp ? offset_bit - tmp - 1 : 0;
817 }
818 offset_bit -= 64;
819 size -= 64;
820 }
821 if (size == 0) {
822 return 0;
823 }
824 tmp = iwbits_reverse_64(*(--p)) & ((((uint64_t) 1) << size) - 1);
825 #endif
826 if (tmp) {
827 uint64_t tmp2;
828 *found = 1;
829 tmp2 = iwbits_find_first_sbit64(tmp);
830 assert(offset_bit > tmp2);
831 return offset_bit > tmp2 ? offset_bit - tmp2 - 1 : 0;
832 } else {
833 return 0;
834 }
835 }
836
837 /**
838 * @brief Return a previously allocated blocks
839 * back into the free-blocks pool.
840 *
841 * @param fms `struct fsm`
842 * @param offset_blk Starting block number of the specified range.
843 * @param length_blk Range size in blocks.
844 */
_fsm_blk_deallocate_lw(struct fsm * fsm,const uint64_t offset_blk,const uint64_t length_blk)845 static iwrc _fsm_blk_deallocate_lw(
846 struct fsm *fsm,
847 const uint64_t offset_blk,
848 const uint64_t length_blk
849 ) {
850 iwrc rc;
851 uint64_t *bmptr;
852 uint64_t left, right;
853 int hasleft = 0, hasright = 0;
854 uint64_t key_offset = offset_blk, key_length = length_blk;
855 uint64_t rm_offset = 0, rm_length = 0;
856 uint64_t lfbkoff = fsm->lfbkoff;
857 uint64_t end_offset_blk = offset_blk + length_blk;
858 fsm_bmopts_t bopts = FSM_BM_NONE;
859
860
861 if (fsm->oflags & IWFSM_STRICT) {
862 bopts |= FSM_BM_STRICT;
863 }
864 rc = _fsm_set_bit_status_lw(fsm, offset_blk, length_blk, 0, bopts);
865 RCRET(rc);
866
867 rc = _fsm_bmptr(fsm, &bmptr);
868 RCRET(rc);
869
870 /* Merge with neighborhoods */
871 left = _fsm_find_prev_set_bit(bmptr, offset_blk, 0, &hasleft);
872 if (lfbkoff && (lfbkoff == end_offset_blk)) {
873 right = lfbkoff + fsm->lfbklen;
874 hasright = 1;
875 } else {
876 uint64_t maxoff = lfbkoff ? lfbkoff : (fsm->bmlen << 3);
877 right = _fsm_find_next_set_bit(bmptr, end_offset_blk, maxoff, &hasright);
878 }
879
880 if (hasleft) {
881 if (offset_blk > left + 1) {
882 left += 1;
883 rm_offset = left;
884 rm_length = offset_blk - left;
885 _fsm_del_fbk(fsm, rm_offset, rm_length);
886 key_offset = rm_offset;
887 key_length += rm_length;
888 }
889 } else if (offset_blk > 0) { /* zero start */
890 rm_offset = 0;
891 rm_length = offset_blk;
892 _fsm_del_fbk(fsm, rm_offset, rm_length);
893 key_offset = rm_offset;
894 key_length += rm_length;
895 }
896 if (hasright && (right > end_offset_blk)) {
897 rm_offset = end_offset_blk;
898 rm_length = right - end_offset_blk;
899 _fsm_del_fbk(fsm, rm_offset, rm_length);
900 key_length += rm_length;
901 }
902 IWRC(_fsm_put_fbk(fsm, key_offset, key_length), rc);
903 return rc;
904 }
905
906 /**
907 * @brief Initialize a new free-space bitmap area.
908 *
909 * If bitmap exists, its content will be moved into newly created area.
910 * Blocks from the previous bitmap are will disposed and deallocated.
911 *
912 * @param fsm `struct fsm
913 * @param bmoff Byte offset of the new bitmap. Value must be page aligned.
914 * @param bmlen Byte length of the new bitmap. Value must be page aligned.
915 Its length must not be lesser than length of old bitmap.
916 */
_fsm_init_lw(struct fsm * fsm,uint64_t bmoff,uint64_t bmlen)917 static iwrc _fsm_init_lw(struct fsm *fsm, uint64_t bmoff, uint64_t bmlen) {
918 iwrc rc;
919 uint8_t *mm, *mm2;
920 size_t sp, sp2;
921 uint64_t old_bmoff, old_bmlen;
922 IWFS_EXT *pool = &fsm->pool;
923
924 if ((bmlen & ((1U << fsm->bpow) - 1)) || (bmoff & ((1U << fsm->bpow) - 1)) || (bmoff & (fsm->aunit - 1))) {
925 return IWFS_ERROR_RANGE_NOT_ALIGNED;
926 }
927 if (bmlen < fsm->bmlen) {
928 rc = IW_ERROR_INVALID_ARGS;
929 iwlog_ecode_error(rc, "Length of the newly initiated bitmap area (bmlen): %" PRIu64
930 " must not be lesser than the current bitmap area length %" PRIu64 "",
931 bmlen, fsm->bmlen);
932 return rc;
933 }
934 if (bmlen * 8 < ((bmoff + bmlen) >> fsm->bpow) + 1) {
935 rc = IW_ERROR_INVALID_ARGS;
936 iwlog_ecode_error(rc, "Length of the newly initiated bitmap area (bmlen): %" PRIu64
937 " is not enough to handle bitmap itself and the file header area.",
938 bmlen);
939 return rc;
940 }
941 rc = _fsm_ensure_size_lw(fsm, bmoff + bmlen);
942 RCRET(rc);
943 if (fsm->mmap_all) {
944 // get mmap area without locking, since we ensured what pool file will not be remapped
945 rc = pool->probe_mmap(pool, 0, &mm, &sp);
946 RCRET(rc);
947 if (sp < bmoff + bmlen) {
948 return IWFS_ERROR_NOT_MMAPED;
949 } else {
950 mm += bmoff;
951 }
952 } else {
953 // get mmap area without locking, since we ensured what pool file will not be remapped
954 rc = pool->probe_mmap(pool, bmoff, &mm, &sp);
955 RCRET(rc);
956 if (sp < bmlen) {
957 return IWFS_ERROR_NOT_MMAPED;
958 }
959 }
960 if (fsm->bmlen) {
961 /* We have an old active bitmap. Lets copy its content to the new location.*/
962 if (IW_RANGES_OVERLAP(fsm->bmoff, fsm->bmoff + fsm->bmlen, bmoff, bmoff + bmlen)) {
963 iwlog_ecode_error2(rc, "New and old bitmap areas are overlaped");
964 return IW_ERROR_INVALID_ARGS;
965 }
966 if (fsm->mmap_all) {
967 mm2 = mm - bmoff + fsm->bmoff;
968 } else {
969 rc = pool->probe_mmap(pool, fsm->bmoff, &mm2, &sp2);
970 if (!rc && (sp2 < fsm->bmlen)) {
971 rc = IWFS_ERROR_NOT_MMAPED;
972 }
973 if (rc) {
974 iwlog_ecode_error2(rc, "Old bitmap area is not mmaped");
975 return rc;
976 }
977 }
978 assert(!((fsm->bmlen - bmlen) & ((1U << fsm->bpow) - 1)));
979 if (fsm->dlsnr) {
980 rc = fsm->dlsnr->onwrite(fsm->dlsnr, bmoff, mm2, fsm->bmlen, 0);
981 RCRET(rc);
982 }
983 memcpy(mm, mm2, fsm->bmlen);
984 if (bmlen > fsm->bmlen) {
985 memset(mm + fsm->bmlen, 0, bmlen - fsm->bmlen);
986 if (fsm->dlsnr) {
987 rc = fsm->dlsnr->onset(fsm->dlsnr, bmoff + fsm->bmlen, 0, bmlen - fsm->bmlen, 0);
988 RCRET(rc);
989 }
990 }
991 } else {
992 mm2 = 0;
993 memset(mm, 0, bmlen);
994 if (fsm->dlsnr) {
995 rc = fsm->dlsnr->onset(fsm->dlsnr, bmoff, 0, bmlen, 0);
996 RCRET(rc);
997 }
998 }
999
1000 /* Backup the previous bitmap range */
1001 old_bmlen = fsm->bmlen;
1002 old_bmoff = fsm->bmoff;
1003 fsm->bmoff = bmoff;
1004 fsm->bmlen = bmlen;
1005
1006 RCC(rc, rollback, _fsm_set_bit_status_lw(fsm, (bmoff >> fsm->bpow), (bmlen >> fsm->bpow), 1, FSM_BM_NONE));
1007 if (!old_bmlen) { /* First time initialization */
1008 /* Header allocation */
1009 RCC(rc, rollback, _fsm_set_bit_status_lw(fsm, 0, (fsm->hdrlen >> fsm->bpow), 1, FSM_BM_NONE));
1010 }
1011
1012 /* Reload fsm tree */
1013 _fsm_load_fsm_lw(fsm, mm, bmlen);
1014
1015 /* Flush new meta */
1016 RCC(rc, rollback, _fsm_write_meta_lw(fsm));
1017 RCC(rc, rollback, pool->sync(pool, IWFS_FDATASYNC));
1018
1019 if (old_bmlen) {
1020 /* Now we are save to deallocate the old bitmap */
1021 rc = _fsm_blk_deallocate_lw(fsm, (old_bmoff >> fsm->bpow), (old_bmlen >> fsm->bpow));
1022 if (!fsm->mmap_all) {
1023 pool->remove_mmap(pool, old_bmoff);
1024 }
1025 }
1026 return rc;
1027
1028 rollback:
1029 /* try to rollback previous bitmap state */
1030 fsm->bmoff = old_bmoff;
1031 fsm->bmlen = old_bmlen;
1032 if (old_bmlen && mm2) {
1033 _fsm_load_fsm_lw(fsm, mm2, old_bmlen);
1034 }
1035 pool->sync(pool, IWFS_FDATASYNC);
1036 return rc;
1037 }
1038
1039 /**
1040 * @brief Resize bitmap area.
1041 * @param fsm `structfsm
1042 * @param size New size of bitmap area in bytes.
1043 */
_fsm_resize_fsm_bitmap_lw(struct fsm * fsm,uint64_t size)1044 static iwrc _fsm_resize_fsm_bitmap_lw(struct fsm *fsm, uint64_t size) {
1045 iwrc rc;
1046 uint64_t bmoffset = 0, bmlen, sp;
1047 IWFS_EXT *pool = &fsm->pool;
1048
1049 if (fsm->bmlen >= size) {
1050 return 0;
1051 }
1052 bmlen = IW_ROUNDUP(size, fsm->aunit); /* align to the system page size. */
1053 rc = _fsm_blk_allocate_aligned_lw(
1054 fsm, (bmlen >> fsm->bpow), &bmoffset, &sp, UINT64_MAX,
1055 IWFSM_ALLOC_NO_STATS | IWFSM_ALLOC_NO_EXTEND | IWFSM_ALLOC_NO_OVERALLOCATE);
1056 if (!rc) {
1057 bmoffset = bmoffset << fsm->bpow;
1058 bmlen = sp << fsm->bpow;
1059 } else if (rc == IWFS_ERROR_NO_FREE_SPACE) {
1060 bmoffset = fsm->bmlen * (1 << fsm->bpow) * 8;
1061 bmoffset = IW_ROUNDUP(bmoffset, fsm->aunit);
1062 }
1063 if (!fsm->mmap_all) {
1064 rc = pool->add_mmap(pool, bmoffset, bmlen, fsm->mmap_opts);
1065 RCRET(rc);
1066 }
1067 rc = _fsm_init_lw(fsm, bmoffset, bmlen);
1068 if (rc && !fsm->mmap_all) {
1069 pool->remove_mmap(pool, bmoffset);
1070 }
1071 return rc;
1072 }
1073
1074 /**
1075 * @brief Allocate a continuous segment of blocks.
1076 *
1077 * @param fsm `struct fsm
1078 * @param length_blk Desired segment length in blocks.
1079 * @param [in,out] offset_blk Allocated segment offset in blocks will be stored into.
1080 * It also specified the desired segment offset to provide allocation locality.
1081 * @param [out] olength_blk Assigned segment length in blocks.
1082 * @param opts
1083 */
_fsm_blk_allocate_lw(struct fsm * fsm,uint64_t length_blk,uint64_t * offset_blk,uint64_t * olength_blk,iwfs_fsm_aflags opts)1084 static iwrc _fsm_blk_allocate_lw(
1085 struct fsm *fsm,
1086 uint64_t length_blk,
1087 uint64_t *offset_blk,
1088 uint64_t *olength_blk,
1089 iwfs_fsm_aflags opts
1090 ) {
1091 iwrc rc;
1092 struct iwavl_node *nn;
1093 fsm_bmopts_t bopts = FSM_BM_NONE;
1094
1095 if (opts & IWFSM_ALLOC_PAGE_ALIGNED) {
1096 while (1) {
1097 rc = _fsm_blk_allocate_aligned_lw(fsm, length_blk, offset_blk, olength_blk, UINT64_MAX, opts);
1098 if (rc == IWFS_ERROR_NO_FREE_SPACE) {
1099 if (opts & IWFSM_ALLOC_NO_EXTEND) {
1100 return IWFS_ERROR_NO_FREE_SPACE;
1101 }
1102 rc = _fsm_resize_fsm_bitmap_lw(fsm, fsm->bmlen << 1);
1103 RCRET(rc);
1104 continue;
1105 }
1106 if (!rc && (opts & IWFSM_SOLID_ALLOCATED_SPACE)) {
1107 uint64_t bs = *offset_blk;
1108 int64_t bl = *olength_blk;
1109 rc = _fsm_ensure_size_lw(fsm, (bs << fsm->bpow) + (bl << fsm->bpow));
1110 }
1111 return rc;
1112 }
1113 }
1114
1115 *olength_blk = length_blk;
1116
1117 start:
1118 nn = (struct iwavl_node*) _fsm_find_matching_fblock_lw(fsm, *offset_blk, length_blk, opts);
1119 if (nn) { /* use existing free space block */
1120 const struct bkey *nk = &BKEY(nn);
1121 uint64_t nlength = FSMBK_LENGTH(nk);
1122 *offset_blk = FSMBK_OFFSET(nk);
1123
1124 _fsm_del_fbk2(fsm, nn);
1125
1126 if (nlength > length_blk) { /* re-save rest of free-space */
1127 if (!(opts & IWFSM_ALLOC_NO_OVERALLOCATE) && fsm->crznum) {
1128 /* todo use lognormal distribution? */
1129 double_t d = ((double_t) fsm->crzsum / (double_t) fsm->crznum) /*avg*/
1130 - (double) (nlength - length_blk); /*rest blk size*/
1131 double_t s = ((double_t) fsm->crzvar / (double_t) fsm->crznum) * 6.0; /* blk size dispersion * 6 */
1132 if ((s > 1) && (d > 0) && (d * d > s)) {
1133 /* its better to attach rest of block to
1134 the record */
1135 *olength_blk = nlength;
1136 } else {
1137 _fsm_put_fbk(fsm, (*offset_blk + length_blk), (nlength - length_blk));
1138 }
1139 } else {
1140 _fsm_put_fbk(fsm, (*offset_blk + length_blk), (nlength - length_blk));
1141 }
1142 }
1143 } else {
1144 if (opts & IWFSM_ALLOC_NO_EXTEND) {
1145 return IWFS_ERROR_NO_FREE_SPACE;
1146 }
1147 rc = _fsm_resize_fsm_bitmap_lw(fsm, fsm->bmlen << 1);
1148 RCRET(rc);
1149 goto start;
1150 }
1151
1152 if (fsm->oflags & IWFSM_STRICT) {
1153 bopts |= FSM_BM_STRICT;
1154 }
1155 rc = _fsm_set_bit_status_lw(fsm, *offset_blk, *olength_blk, 1, bopts);
1156 if (!rc && !(opts & IWFSM_ALLOC_NO_STATS)) {
1157 double_t avg;
1158 /* Update allocation statistics */
1159 if (fsm->crznum > FSM_MAX_STATS_COUNT) {
1160 fsm->crznum = 0;
1161 fsm->crzsum = 0;
1162 fsm->crzvar = 0;
1163 }
1164 ++fsm->crznum;
1165 fsm->crzsum += length_blk;
1166 avg = (double_t) fsm->crzsum / (double_t) fsm->crznum; /* average */
1167 fsm->crzvar
1168 += (uint64_t) (((double_t) length_blk - avg) * ((double_t) length_blk - avg) + 0.5L); /* variance */
1169 }
1170 if (!rc && (opts & IWFSM_SOLID_ALLOCATED_SPACE)) {
1171 uint64_t bs = *offset_blk;
1172 int64_t bl = *olength_blk;
1173 rc = _fsm_ensure_size_lw(fsm, (bs << fsm->bpow) + (bl << fsm->bpow));
1174 }
1175 if (!rc && (opts & IWFSM_SYNC_BMAP)) {
1176 uint64_t *bmptr;
1177 if (!_fsm_bmptr(fsm, &bmptr)) {
1178 IWFS_EXT *pool = &fsm->pool;
1179 rc = pool->sync_mmap(pool, fsm->bmoff, IWFS_SYNCDEFAULT);
1180 }
1181 }
1182 return rc;
1183 }
1184
1185 /**
1186 * @brief Remove all free blocks from the and of file and trim its size.
1187 */
_fsm_trim_tail_lw(struct fsm * fsm)1188 static iwrc _fsm_trim_tail_lw(struct fsm *fsm) {
1189 iwrc rc;
1190 int hasleft;
1191 uint64_t length, lastblk, *bmptr;
1192 IWFS_EXT_STATE fstate;
1193 uint64_t offset = 0;
1194
1195 if (!(fsm->omode & IWFS_OWRITE)) {
1196 return 0;
1197 }
1198 /* find free space for fsm with lesser offset than actual */
1199 rc = _fsm_blk_allocate_aligned_lw(
1200 fsm, (fsm->bmlen >> fsm->bpow), &offset, &length, (fsm->bmoff >> fsm->bpow),
1201 IWFSM_ALLOC_NO_EXTEND | IWFSM_ALLOC_NO_OVERALLOCATE | IWFSM_ALLOC_NO_STATS);
1202
1203 if (rc && (rc != IWFS_ERROR_NO_FREE_SPACE)) {
1204 return rc;
1205 }
1206 if (rc) {
1207 rc = 0;
1208 } else if ((offset << fsm->bpow) < fsm->bmoff) {
1209 offset = offset << fsm->bpow;
1210 length = length << fsm->bpow;
1211 assert(offset != fsm->bmoff);
1212 fsm->pool.add_mmap(&fsm->pool, offset, length, fsm->mmap_opts);
1213 RCC(rc, finish, _fsm_init_lw(fsm, offset, length));
1214 } else {
1215 /* shoud never be reached */
1216 assert(0);
1217 RCC(rc, finish, _fsm_blk_deallocate_lw(fsm, offset, length));
1218 }
1219
1220 RCC(rc, finish, _fsm_bmptr(fsm, &bmptr)); // -V519
1221
1222 lastblk = (fsm->bmoff + fsm->bmlen) >> fsm->bpow;
1223 offset = _fsm_find_prev_set_bit(bmptr, (fsm->bmlen << 3), lastblk, &hasleft);
1224 if (hasleft) {
1225 lastblk = offset + 1;
1226 }
1227 rc = fsm->pool.state(&fsm->pool, &fstate);
1228 if (!rc && (fstate.fsize > (lastblk << fsm->bpow))) {
1229 rc = fsm->pool.truncate(&fsm->pool, lastblk << fsm->bpow);
1230 }
1231
1232 finish:
1233 return rc;
1234 }
1235
_fsm_init_impl(struct fsm * fsm,const IWFS_FSM_OPTS * opts)1236 static iwrc _fsm_init_impl(struct fsm *fsm, const IWFS_FSM_OPTS *opts) {
1237 fsm->oflags = opts->oflags;
1238 fsm->aunit = iwp_alloc_unit();
1239 fsm->bpow = opts->bpow;
1240 fsm->mmap_all = opts->mmap_all;
1241 if (!fsm->bpow) {
1242 fsm->bpow = 6; // 64bit block
1243 } else if (fsm->bpow > FSM_MAX_BLOCK_POW) {
1244 return IWFS_ERROR_INVALID_BLOCK_SIZE;
1245 } else if ((1U << fsm->bpow) > fsm->aunit) {
1246 return IWFS_ERROR_PLATFORM_PAGE;
1247 }
1248 return 0;
1249 }
1250
_fsm_init_locks(struct fsm * fsm,const IWFS_FSM_OPTS * opts)1251 static iwrc _fsm_init_locks(struct fsm *fsm, const IWFS_FSM_OPTS *opts) {
1252 if (opts->oflags & IWFSM_NOLOCKS) {
1253 fsm->ctlrwlk = 0;
1254 return 0;
1255 }
1256 fsm->ctlrwlk = calloc(1, sizeof(*fsm->ctlrwlk));
1257 if (!fsm->ctlrwlk) {
1258 return iwrc_set_errno(IW_ERROR_ALLOC, errno);
1259 }
1260 int rci = pthread_rwlock_init(fsm->ctlrwlk, 0);
1261 if (rci) {
1262 free(fsm->ctlrwlk);
1263 fsm->ctlrwlk = 0;
1264 return iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci);
1265 }
1266 return 0;
1267 }
1268
_fsm_destroy_locks(struct fsm * fsm)1269 static iwrc _fsm_destroy_locks(struct fsm *fsm) {
1270 if (!fsm->ctlrwlk) {
1271 return 0;
1272 }
1273 iwrc rc = 0;
1274 int rci = pthread_rwlock_destroy(fsm->ctlrwlk);
1275 if (rci) {
1276 IWRC(iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci), rc);
1277 }
1278 free(fsm->ctlrwlk);
1279 fsm->ctlrwlk = 0;
1280 return rc;
1281 }
1282
_fsm_read_meta_lr(struct fsm * fsm)1283 static iwrc _fsm_read_meta_lr(struct fsm *fsm) {
1284 iwrc rc;
1285 uint32_t lv;
1286 uint64_t llv;
1287 size_t sp, rp = 0;
1288 uint8_t hdr[IWFSM_CUSTOM_HDR_DATA_OFFSET] = { 0 };
1289
1290 /*
1291 [FSM_CTL_MAGICK u32][block pow u8]
1292 [bmoffset u64][bmlength u64]
1293 [u64 crzsum][u32 crznum][u64 crszvar][u256 reserved]
1294 [custom header size u32][custom header data...]
1295 [fsm data...]
1296 */
1297
1298 rc = fsm->pool.read(&fsm->pool, 0, hdr, IWFSM_CUSTOM_HDR_DATA_OFFSET, &sp);
1299 if (rc) {
1300 iwlog_ecode_error3(rc);
1301 return rc;
1302 }
1303
1304 /* Magic */
1305 memcpy(&lv, hdr + rp, sizeof(lv)); // -V512
1306 lv = IW_ITOHL(lv);
1307 if (lv != IWFSM_MAGICK) {
1308 rc = IWFS_ERROR_INVALID_FILEMETA;
1309 iwlog_ecode_error2(rc, "Invalid file magic number");
1310 return rc;
1311 }
1312 rp += sizeof(lv);
1313
1314 /* Block pow */
1315 memcpy(&fsm->bpow, hdr + rp, sizeof(fsm->bpow));
1316 rp += sizeof(fsm->bpow);
1317
1318 if (fsm->bpow > FSM_MAX_BLOCK_POW) {
1319 rc = IWFS_ERROR_INVALID_FILEMETA;
1320 iwlog_ecode_error(rc, "Invalid file blocks pow: %u", fsm->bpow);
1321 return rc;
1322 }
1323 if ((1U << fsm->bpow) > fsm->aunit) {
1324 rc = IWFS_ERROR_PLATFORM_PAGE;
1325 iwlog_ecode_error(rc, "Block size: %u must not be greater than system page size: %zu",
1326 (1U << fsm->bpow), fsm->aunit);
1327 }
1328
1329 /* Free-space bitmap offset */
1330 memcpy(&llv, hdr + rp, sizeof(llv));
1331 llv = IW_ITOHLL(llv);
1332 fsm->bmoff = llv;
1333 rp += sizeof(llv);
1334
1335 /* Free-space bitmap length */
1336 memcpy(&llv, hdr + rp, sizeof(llv));
1337 llv = IW_ITOHLL(llv);
1338 fsm->bmlen = llv;
1339 if (llv & (64 - 1)) {
1340 rc = IWFS_ERROR_INVALID_FILEMETA;
1341 iwlog_ecode_error(rc, "Free-space bitmap length is not 64bit aligned: %" PRIuMAX "", fsm->bmlen);
1342 }
1343 rp += sizeof(llv);
1344
1345 /* Cumulative sum of record sizes acquired by `allocate` */
1346 memcpy(&llv, hdr + rp, sizeof(llv));
1347 llv = IW_ITOHLL(llv);
1348 fsm->crzsum = llv;
1349 rp += sizeof(llv);
1350
1351 /* Cumulative number of records acquired by `allocated` */
1352 memcpy(&lv, hdr + rp, sizeof(lv));
1353 lv = IW_ITOHL(lv);
1354 fsm->crznum = lv;
1355 rp += sizeof(lv);
1356
1357 /* Record sizes standard variance (deviation^2 * N) */
1358 memcpy(&llv, hdr + rp, sizeof(llv));
1359 llv = IW_ITOHLL(llv);
1360 fsm->crzvar = llv;
1361 rp += sizeof(llv);
1362
1363 /* Reserved */
1364 rp += 32;
1365
1366 /* Header size */
1367 memcpy(&lv, hdr + rp, sizeof(lv));
1368 lv = IW_ITOHL(lv);
1369 fsm->hdrlen = lv;
1370 rp += sizeof(lv);
1371
1372 assert(rp == IWFSM_CUSTOM_HDR_DATA_OFFSET);
1373 return rc;
1374 }
1375
_fsm_init_new_lw(struct fsm * fsm,const IWFS_FSM_OPTS * opts)1376 static iwrc _fsm_init_new_lw(struct fsm *fsm, const IWFS_FSM_OPTS *opts) {
1377 FSM_ENSURE_OPEN(fsm);
1378 iwrc rc;
1379 uint64_t bmlen, bmoff;
1380 IWFS_EXT *pool = &fsm->pool;
1381 assert(fsm->aunit && fsm->bpow);
1382
1383 fsm->hdrlen = opts->hdrlen + IWFSM_CUSTOM_HDR_DATA_OFFSET;
1384 fsm->hdrlen = IW_ROUNDUP(fsm->hdrlen, 1ULL << fsm->bpow);
1385 bmlen = opts->bmlen > 0 ? IW_ROUNDUP(opts->bmlen, fsm->aunit) : fsm->aunit;
1386 bmoff = IW_ROUNDUP(fsm->hdrlen, fsm->aunit);
1387
1388 if (fsm->mmap_all) {
1389 /* mmap whole file */
1390 rc = pool->add_mmap(pool, 0, SIZE_T_MAX, fsm->mmap_opts);
1391 RCRET(rc);
1392 } else {
1393 /* mmap header */
1394 rc = pool->add_mmap(pool, 0, fsm->hdrlen, fsm->mmap_opts);
1395 RCRET(rc);
1396 /* mmap the fsm bitmap index */
1397 rc = pool->add_mmap(pool, bmoff, bmlen, fsm->mmap_opts);
1398 RCRET(rc);
1399 }
1400 return _fsm_init_lw(fsm, bmoff, bmlen);
1401 }
1402
_fsm_init_existing_lw(struct fsm * fsm)1403 static iwrc _fsm_init_existing_lw(struct fsm *fsm) {
1404 FSM_ENSURE_OPEN(fsm);
1405 iwrc rc;
1406 size_t sp;
1407 uint8_t *mm;
1408 IWFS_EXT *pool = &fsm->pool;
1409
1410 RCC(rc, finish, _fsm_read_meta_lr(fsm));
1411
1412 if (fsm->mmap_all) {
1413 /* mmap the whole file */
1414 RCC(rc, finish, pool->add_mmap(pool, 0, SIZE_T_MAX, fsm->mmap_opts));
1415 RCC(rc, finish, pool->probe_mmap(pool, 0, &mm, &sp));
1416 if (sp < fsm->bmoff + fsm->bmlen) {
1417 rc = IWFS_ERROR_NOT_MMAPED;
1418 goto finish;
1419 } else {
1420 mm += fsm->bmoff;
1421 }
1422 } else {
1423 /* mmap the header of file */
1424 RCC(rc, finish, pool->add_mmap(pool, 0, fsm->hdrlen, fsm->mmap_opts));
1425 /* mmap the fsm bitmap index */
1426 RCC(rc, finish, pool->add_mmap(pool, fsm->bmoff, fsm->bmlen, fsm->mmap_opts));
1427 RCC(rc, finish, pool->probe_mmap(pool, fsm->bmoff, &mm, &sp));
1428 if (sp < fsm->bmlen) {
1429 rc = IWFS_ERROR_NOT_MMAPED;
1430 goto finish;
1431 }
1432 }
1433
1434 _fsm_load_fsm_lw(fsm, mm, fsm->bmlen);
1435
1436 finish:
1437 return rc;
1438 }
1439
1440 /**
1441 * @brief Check if all blocks within the specified range have been `allocated`.
1442 *
1443 * @param fsm `struct fsm`
1444 * @param offset_blk Starting block number of the specified range.
1445 * @param length_blk Range size in blocks.
1446 * @param [out] ret Checking result.
1447 */
_fsm_is_fully_allocated_lr(struct fsm * fsm,uint64_t offset_blk,uint64_t length_blk,int * ret)1448 static iwrc _fsm_is_fully_allocated_lr(struct fsm *fsm, uint64_t offset_blk, uint64_t length_blk, int *ret) {
1449 uint64_t end = offset_blk + length_blk;
1450 *ret = 1;
1451 if ((length_blk < 1) || (end < offset_blk) || (end > (fsm->bmlen << 3))) {
1452 *ret = 0;
1453 return 0;
1454 }
1455 iwrc rc = _fsm_set_bit_status_lw(fsm, offset_blk, length_blk, 0, FSM_BM_DRY_RUN | FSM_BM_STRICT);
1456 if (rc == IWFS_ERROR_FSM_SEGMENTATION) {
1457 *ret = 0;
1458 return 0;
1459 }
1460 return rc;
1461 }
1462
1463 /*************************************************************************************************
1464 * Public API *
1465 *************************************************************************************************/
1466
_fsm_write(struct IWFS_FSM * f,off_t off,const void * buf,size_t siz,size_t * sp)1467 static iwrc _fsm_write(struct IWFS_FSM *f, off_t off, const void *buf, size_t siz, size_t *sp) {
1468 FSM_ENSURE_OPEN2(f);
1469 struct fsm *fsm = f->impl;
1470 iwrc rc = _fsm_ctrl_rlock(fsm);
1471 RCRET(rc);
1472 if (fsm->oflags & IWFSM_STRICT) {
1473 int allocated = 0;
1474 IWRC(_fsm_is_fully_allocated_lr(fsm,
1475 (uint64_t) off >> fsm->bpow,
1476 IW_ROUNDUP(siz, 1ULL << fsm->bpow) >> fsm->bpow,
1477 &allocated), rc);
1478 if (!rc) {
1479 if (!allocated) {
1480 rc = IWFS_ERROR_FSM_SEGMENTATION;
1481 } else {
1482 rc = fsm->pool.write(&fsm->pool, off, buf, siz, sp);
1483 }
1484 }
1485 } else {
1486 rc = fsm->pool.write(&fsm->pool, off, buf, siz, sp);
1487 }
1488 _fsm_ctrl_unlock(fsm);
1489 return rc;
1490 }
1491
_fsm_read(struct IWFS_FSM * f,off_t off,void * buf,size_t siz,size_t * sp)1492 static iwrc _fsm_read(struct IWFS_FSM *f, off_t off, void *buf, size_t siz, size_t *sp) {
1493 FSM_ENSURE_OPEN2(f);
1494 struct fsm *fsm = f->impl;
1495 iwrc rc = _fsm_ctrl_rlock(fsm);
1496 RCRET(rc);
1497 if (fsm->oflags & IWFSM_STRICT) {
1498 int allocated = 0;
1499 IWRC(_fsm_is_fully_allocated_lr(fsm, (uint64_t) off >> fsm->bpow,
1500 IW_ROUNDUP(siz, 1ULL << fsm->bpow) >> fsm->bpow,
1501 &allocated), rc);
1502 if (!rc) {
1503 if (!allocated) {
1504 rc = IWFS_ERROR_FSM_SEGMENTATION;
1505 } else {
1506 rc = fsm->pool.read(&fsm->pool, off, buf, siz, sp);
1507 }
1508 }
1509 } else {
1510 rc = fsm->pool.read(&fsm->pool, off, buf, siz, sp);
1511 }
1512 _fsm_ctrl_unlock(fsm);
1513 return rc;
1514 }
1515
_fsm_sync(struct IWFS_FSM * f,iwfs_sync_flags flags)1516 static iwrc _fsm_sync(struct IWFS_FSM *f, iwfs_sync_flags flags) {
1517 FSM_ENSURE_OPEN2(f);
1518 iwrc rc = _fsm_ctrl_rlock(f->impl);
1519 RCRET(rc);
1520 IWRC(_fsm_write_meta_lw(f->impl), rc);
1521 IWRC(f->impl->pool.sync(&f->impl->pool, flags), rc);
1522 IWRC(_fsm_ctrl_unlock(f->impl), rc);
1523 return rc;
1524 }
1525
_fsm_close(struct IWFS_FSM * f)1526 static iwrc _fsm_close(struct IWFS_FSM *f) {
1527 if (!f || !f->impl) {
1528 return 0;
1529 }
1530 iwrc rc = 0;
1531 struct fsm *fsm = f->impl;
1532 IWRC(_fsm_ctrl_wlock(fsm), rc);
1533 if (fsm->root && (fsm->omode & IWFS_OWRITE)) {
1534 if (!(fsm->oflags & IWFSM_NO_TRIM_ON_CLOSE)) {
1535 IWRC(_fsm_trim_tail_lw(fsm), rc);
1536 }
1537 IWRC(_fsm_write_meta_lw(fsm), rc);
1538 if (!fsm->dlsnr) {
1539 IWRC(fsm->pool.sync(&fsm->pool, IWFS_SYNCDEFAULT), rc);
1540 }
1541 }
1542 IWRC(fsm->pool.close(&fsm->pool), rc);
1543 _fsm_node_destroy(fsm->root);
1544 IWRC(_fsm_ctrl_unlock(fsm), rc);
1545 IWRC(_fsm_destroy_locks(fsm), rc);
1546 f->impl = 0;
1547 free(fsm);
1548 return rc;
1549 }
1550
_fsm_ensure_size_lw(struct fsm * fsm,off_t size)1551 IW_INLINE iwrc _fsm_ensure_size_lw(struct fsm *fsm, off_t size) {
1552 return fsm->pool.ensure_size(&fsm->pool, size);
1553 }
1554
_fsm_ensure_size(struct IWFS_FSM * f,off_t size)1555 static iwrc _fsm_ensure_size(struct IWFS_FSM *f, off_t size) {
1556 FSM_ENSURE_OPEN2(f);
1557 iwrc rc = _fsm_ctrl_rlock(f->impl);
1558 RCRET(rc);
1559 if (f->impl->bmoff + f->impl->bmlen > size) {
1560 rc = IWFS_ERROR_RESIZE_FAIL;
1561 goto finish;
1562 }
1563 rc = _fsm_ensure_size_lw(f->impl, size);
1564
1565 finish:
1566 IWRC(_fsm_ctrl_unlock(f->impl), rc);
1567 return rc;
1568 }
1569
_fsm_add_mmap(struct IWFS_FSM * f,off_t off,size_t maxlen,iwfs_ext_mmap_opts_t opts)1570 static iwrc _fsm_add_mmap(struct IWFS_FSM *f, off_t off, size_t maxlen, iwfs_ext_mmap_opts_t opts) {
1571 FSM_ENSURE_OPEN2(f);
1572 return f->impl->pool.add_mmap(&f->impl->pool, off, maxlen, opts);
1573 }
1574
_fsm_remap_all(struct IWFS_FSM * f)1575 static iwrc _fsm_remap_all(struct IWFS_FSM *f) {
1576 FSM_ENSURE_OPEN2(f);
1577 return f->impl->pool.remap_all(&f->impl->pool);
1578 }
1579
_fsm_acquire_mmap(struct IWFS_FSM * f,off_t off,uint8_t ** mm,size_t * sp)1580 iwrc _fsm_acquire_mmap(struct IWFS_FSM *f, off_t off, uint8_t **mm, size_t *sp) {
1581 return f->impl->pool.acquire_mmap(&f->impl->pool, off, mm, sp);
1582 }
1583
_fsm_release_mmap(struct IWFS_FSM * f)1584 iwrc _fsm_release_mmap(struct IWFS_FSM *f) {
1585 return f->impl->pool.release_mmap(&f->impl->pool);
1586 }
1587
_fsm_probe_mmap(struct IWFS_FSM * f,off_t off,uint8_t ** mm,size_t * sp)1588 static iwrc _fsm_probe_mmap(struct IWFS_FSM *f, off_t off, uint8_t **mm, size_t *sp) {
1589 FSM_ENSURE_OPEN2(f);
1590 return f->impl->pool.probe_mmap(&f->impl->pool, off, mm, sp);
1591 }
1592
_fsm_remove_mmap(struct IWFS_FSM * f,off_t off)1593 static iwrc _fsm_remove_mmap(struct IWFS_FSM *f, off_t off) {
1594 FSM_ENSURE_OPEN2(f);
1595 return f->impl->pool.remove_mmap(&f->impl->pool, off);
1596 }
1597
_fsm_sync_mmap(struct IWFS_FSM * f,off_t off,iwfs_sync_flags flags)1598 static iwrc _fsm_sync_mmap(struct IWFS_FSM *f, off_t off, iwfs_sync_flags flags) {
1599 FSM_ENSURE_OPEN2(f);
1600 return f->impl->pool.sync_mmap(&f->impl->pool, off, flags);
1601 }
1602
_fsm_allocate(struct IWFS_FSM * f,off_t len,off_t * oaddr,off_t * olen,iwfs_fsm_aflags opts)1603 static iwrc _fsm_allocate(struct IWFS_FSM *f, off_t len, off_t *oaddr, off_t *olen, iwfs_fsm_aflags opts) {
1604 FSM_ENSURE_OPEN2(f);
1605 iwrc rc;
1606 uint64_t sbnum, nlen;
1607 struct fsm *fsm = f->impl;
1608
1609 *olen = 0;
1610 if (!(fsm->omode & IWFS_OWRITE)) {
1611 return IW_ERROR_READONLY;
1612 }
1613 if (len <= 0) {
1614 return IW_ERROR_INVALID_ARGS;
1615 }
1616 /* Required blocks number */
1617 sbnum = (uint64_t) *oaddr >> fsm->bpow;
1618 len = IW_ROUNDUP(len, 1ULL << fsm->bpow);
1619
1620 rc = _fsm_ctrl_wlock(fsm);
1621 RCRET(rc);
1622 rc = _fsm_blk_allocate_lw(f->impl, (uint64_t) len >> fsm->bpow, &sbnum, &nlen, opts);
1623 if (!rc) {
1624 *olen = (nlen << fsm->bpow);
1625 *oaddr = (sbnum << fsm->bpow);
1626 }
1627 IWRC(_fsm_ctrl_unlock(fsm), rc);
1628 return rc;
1629 }
1630
_fsm_reallocate(struct IWFS_FSM * f,off_t nlen,off_t * oaddr,off_t * olen,iwfs_fsm_aflags opts)1631 static iwrc _fsm_reallocate(struct IWFS_FSM *f, off_t nlen, off_t *oaddr, off_t *olen, iwfs_fsm_aflags opts) {
1632 FSM_ENSURE_OPEN2(f);
1633 iwrc rc;
1634 struct fsm *fsm = f->impl;
1635
1636 if (!(fsm->omode & IWFS_OWRITE)) {
1637 return IW_ERROR_READONLY;
1638 }
1639 if ((*oaddr & ((1ULL << fsm->bpow) - 1)) || (*olen & ((1ULL << fsm->bpow) - 1))) {
1640 return IWFS_ERROR_RANGE_NOT_ALIGNED;
1641 }
1642 uint64_t sp;
1643 uint64_t nlen_blk = IW_ROUNDUP((uint64_t) nlen, 1ULL << fsm->bpow) >> fsm->bpow;
1644 uint64_t olen_blk = (uint64_t) *olen >> fsm->bpow;
1645 uint64_t oaddr_blk = (uint64_t) *oaddr >> fsm->bpow;
1646 uint64_t naddr_blk = oaddr_blk;
1647
1648 if (nlen_blk == olen_blk) {
1649 return 0;
1650 }
1651 rc = _fsm_ctrl_wlock(fsm);
1652 RCRET(rc);
1653 if (nlen_blk < olen_blk) {
1654 rc = _fsm_blk_deallocate_lw(fsm, oaddr_blk + nlen_blk, olen_blk - nlen_blk);
1655 if (!rc) {
1656 *oaddr = oaddr_blk << fsm->bpow;
1657 *olen = nlen_blk << fsm->bpow;
1658 }
1659 } else {
1660 RCC(rc, finish, _fsm_blk_allocate_lw(fsm, nlen_blk, &naddr_blk, &sp, opts));
1661 if (naddr_blk != oaddr_blk) {
1662 RCC(rc, finish, fsm->pool.copy(&fsm->pool, *oaddr, (size_t) *olen, naddr_blk << fsm->bpow));
1663 }
1664 RCC(rc, finish, _fsm_blk_deallocate_lw(fsm, oaddr_blk, olen_blk));
1665 *oaddr = naddr_blk << fsm->bpow;
1666 *olen = sp << fsm->bpow;
1667 }
1668
1669 finish:
1670 IWRC(_fsm_ctrl_unlock(fsm), rc);
1671 return rc;
1672 }
1673
_fsm_deallocate(struct IWFS_FSM * f,off_t addr,off_t len)1674 static iwrc _fsm_deallocate(struct IWFS_FSM *f, off_t addr, off_t len) {
1675 FSM_ENSURE_OPEN2(f);
1676 iwrc rc;
1677 struct fsm *fsm = f->impl;
1678 off_t offset_blk = (uint64_t) addr >> fsm->bpow;
1679 off_t length_blk = (uint64_t) len >> fsm->bpow;
1680
1681 if (!(fsm->omode & IWFS_OWRITE)) {
1682 return IW_ERROR_READONLY;
1683 }
1684 if (addr & ((1ULL << fsm->bpow) - 1)) {
1685 return IWFS_ERROR_RANGE_NOT_ALIGNED;
1686 }
1687 rc = _fsm_ctrl_wlock(fsm);
1688 RCRET(rc);
1689 if ( IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, 0, (fsm->hdrlen >> fsm->bpow))
1690 || IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, (fsm->bmoff >> fsm->bpow),
1691 (fsm->bmoff >> fsm->bpow) + (fsm->bmlen >> fsm->bpow))) {
1692 // Deny deallocations in header or free-space bitmap itself
1693 IWRC(_fsm_ctrl_unlock(fsm), rc);
1694 return IWFS_ERROR_FSM_SEGMENTATION;
1695 }
1696 rc = _fsm_blk_deallocate_lw(fsm, (uint64_t) offset_blk, (uint64_t) length_blk);
1697 IWRC(_fsm_ctrl_unlock(fsm), rc);
1698 return rc;
1699 }
1700
_fsm_check_allocation_status(struct IWFS_FSM * f,off_t addr,off_t len,bool allocated)1701 static iwrc _fsm_check_allocation_status(struct IWFS_FSM *f, off_t addr, off_t len, bool allocated) {
1702 struct fsm *fsm = f->impl;
1703 if ((addr & ((1ULL << fsm->bpow) - 1)) || (len & ((1ULL << fsm->bpow) - 1))) {
1704 return IWFS_ERROR_RANGE_NOT_ALIGNED;
1705 }
1706 iwrc rc = _fsm_ctrl_rlock(fsm);
1707 RCRET(rc);
1708 off_t offset_blk = (uint64_t) addr >> fsm->bpow;
1709 off_t length_blk = (uint64_t) len >> fsm->bpow;
1710 if ( IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, 0, (fsm->hdrlen >> fsm->bpow))
1711 || IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, (fsm->bmoff >> fsm->bpow),
1712 (fsm->bmoff >> fsm->bpow) + (fsm->bmlen >> fsm->bpow))) {
1713 IWRC(_fsm_ctrl_unlock(fsm), rc);
1714 return IWFS_ERROR_FSM_SEGMENTATION;
1715 }
1716 rc = _fsm_set_bit_status_lw(fsm, (uint64_t) offset_blk, (uint64_t) length_blk,
1717 allocated ? 0 : 1, FSM_BM_DRY_RUN | FSM_BM_STRICT);
1718 IWRC(_fsm_ctrl_unlock(fsm), rc);
1719 return rc;
1720 }
1721
_fsm_writehdr(struct IWFS_FSM * f,off_t off,const void * buf,off_t siz)1722 static iwrc _fsm_writehdr(struct IWFS_FSM *f, off_t off, const void *buf, off_t siz) {
1723 FSM_ENSURE_OPEN2(f);
1724 iwrc rc;
1725 uint8_t *mm;
1726 if (siz < 1) {
1727 return 0;
1728 }
1729 struct fsm *fsm = f->impl;
1730 if ((IWFSM_CUSTOM_HDR_DATA_OFFSET + off + siz) > fsm->hdrlen) {
1731 return IW_ERROR_OUT_OF_BOUNDS;
1732 }
1733 rc = fsm->pool.acquire_mmap(&fsm->pool, 0, &mm, 0);
1734 if (!rc) {
1735 if (fsm->dlsnr) {
1736 rc = fsm->dlsnr->onwrite(fsm->dlsnr, IWFSM_CUSTOM_HDR_DATA_OFFSET + off, buf, siz, 0);
1737 }
1738 memmove(mm + IWFSM_CUSTOM_HDR_DATA_OFFSET + off, buf, (size_t) siz);
1739 IWRC(fsm->pool.release_mmap(&fsm->pool), rc);
1740 }
1741 return rc;
1742 }
1743
_fsm_readhdr(struct IWFS_FSM * f,off_t off,void * buf,off_t siz)1744 static iwrc _fsm_readhdr(struct IWFS_FSM *f, off_t off, void *buf, off_t siz) {
1745 FSM_ENSURE_OPEN2(f);
1746 iwrc rc;
1747 uint8_t *mm;
1748 if (siz < 1) {
1749 return 0;
1750 }
1751 struct fsm *fsm = f->impl;
1752 if ((IWFSM_CUSTOM_HDR_DATA_OFFSET + off + siz) > fsm->hdrlen) {
1753 return IW_ERROR_OUT_OF_BOUNDS;
1754 }
1755 rc = fsm->pool.acquire_mmap(&fsm->pool, 0, &mm, 0);
1756 if (!rc) {
1757 memmove(buf, mm + IWFSM_CUSTOM_HDR_DATA_OFFSET + off, (size_t) siz);
1758 rc = fsm->pool.release_mmap(&fsm->pool);
1759 }
1760 return rc;
1761 }
1762
_fsm_clear(struct IWFS_FSM * f,iwfs_fsm_clrfalgs clrflags)1763 static iwrc _fsm_clear(struct IWFS_FSM *f, iwfs_fsm_clrfalgs clrflags) {
1764 FSM_ENSURE_OPEN2(f);
1765 struct fsm *fsm = f->impl;
1766 uint64_t bmoff, bmlen;
1767 iwrc rc = _fsm_ctrl_wlock(fsm);
1768 bmlen = fsm->bmlen;
1769 if (!bmlen) {
1770 goto finish;
1771 }
1772 if (!fsm->mmap_all && fsm->bmoff) {
1773 IWRC(fsm->pool.remove_mmap(&fsm->pool, fsm->bmoff), rc);
1774 }
1775 bmoff = IW_ROUNDUP(fsm->hdrlen, fsm->aunit);
1776 if (!fsm->mmap_all) {
1777 IWRC(fsm->pool.add_mmap(&fsm->pool, bmoff, bmlen, fsm->mmap_opts), rc);
1778 }
1779 RCGO(rc, finish);
1780 fsm->bmlen = 0;
1781 fsm->bmoff = 0;
1782 rc = _fsm_init_lw(fsm, bmoff, bmlen);
1783 if (!rc && (clrflags & IWFSM_CLEAR_TRIM)) {
1784 rc = _fsm_trim_tail_lw(fsm);
1785 }
1786
1787 finish:
1788 IWRC(_fsm_ctrl_unlock(fsm), rc);
1789 return rc;
1790 }
1791
_fsm_extfile(struct IWFS_FSM * f,IWFS_EXT ** ext)1792 static iwrc _fsm_extfile(struct IWFS_FSM *f, IWFS_EXT **ext) {
1793 FSM_ENSURE_OPEN2(f);
1794 *ext = &f->impl->pool;
1795 return 0;
1796 }
1797
_fsm_state(struct IWFS_FSM * f,IWFS_FSM_STATE * state)1798 static iwrc _fsm_state(struct IWFS_FSM *f, IWFS_FSM_STATE *state) {
1799 FSM_ENSURE_OPEN2(f);
1800 struct fsm *fsm = f->impl;
1801 iwrc rc = _fsm_ctrl_rlock(fsm);
1802 memset(state, 0, sizeof(*state));
1803 IWRC(fsm->pool.state(&fsm->pool, &state->exfile), rc);
1804 state->block_size = 1U << fsm->bpow;
1805 state->oflags = fsm->oflags;
1806 state->hdrlen = fsm->hdrlen;
1807 state->blocks_num = fsm->bmlen << 3;
1808 state->free_segments_num = fsm->fsmnum;
1809 state->avg_alloc_size = fsm->crznum > 0 ? (double_t) fsm->crzsum / (double_t) fsm->crznum : 0;
1810 state->alloc_dispersion = fsm->crznum > 0 ? (double_t) fsm->crzvar / (double_t) fsm->crznum : 0;
1811 IWRC(_fsm_ctrl_unlock(fsm), rc);
1812 return rc;
1813 }
1814
iwfs_fsmfile_open(IWFS_FSM * f,const IWFS_FSM_OPTS * opts)1815 iwrc iwfs_fsmfile_open(IWFS_FSM *f, const IWFS_FSM_OPTS *opts) {
1816 assert(f && opts);
1817 iwrc rc = 0;
1818 IWFS_EXT_STATE fstate = { 0 };
1819 const char *path = opts->exfile.file.path;
1820
1821 memset(f, 0, sizeof(*f));
1822 RCC(rc, finish, iwfs_fsmfile_init());
1823
1824 f->write = _fsm_write;
1825 f->read = _fsm_read;
1826 f->close = _fsm_close;
1827 f->sync = _fsm_sync;
1828 f->state = _fsm_state;
1829
1830 f->ensure_size = _fsm_ensure_size;
1831 f->add_mmap = _fsm_add_mmap;
1832 f->remap_all = _fsm_remap_all;
1833 f->acquire_mmap = _fsm_acquire_mmap;
1834 f->probe_mmap = _fsm_probe_mmap;
1835 f->release_mmap = _fsm_release_mmap;
1836 f->remove_mmap = _fsm_remove_mmap;
1837 f->sync_mmap = _fsm_sync_mmap;
1838
1839 f->allocate = _fsm_allocate;
1840 f->reallocate = _fsm_reallocate;
1841 f->deallocate = _fsm_deallocate;
1842 f->check_allocation_status = _fsm_check_allocation_status;
1843 f->writehdr = _fsm_writehdr;
1844 f->readhdr = _fsm_readhdr;
1845 f->clear = _fsm_clear;
1846 f->extfile = _fsm_extfile;
1847
1848 if (!path) {
1849 return IW_ERROR_INVALID_ARGS;
1850 }
1851 struct fsm *fsm = f->impl = calloc(1, sizeof(*f->impl));
1852 if (!fsm) {
1853 return iwrc_set_errno(IW_ERROR_ALLOC, errno);
1854 }
1855 fsm->f = f;
1856 fsm->dlsnr = opts->exfile.file.dlsnr; // Copy data changes listener address
1857 fsm->mmap_opts = opts->mmap_opts;
1858
1859 IWFS_EXT_OPTS rwl_opts = opts->exfile;
1860 rwl_opts.use_locks = !(opts->oflags & IWFSM_NOLOCKS);
1861
1862 RCC(rc, finish, _fsm_init_impl(fsm, opts));
1863 RCC(rc, finish, _fsm_init_locks(fsm, opts));
1864 RCC(rc, finish, iwfs_exfile_open(&fsm->pool, &rwl_opts));
1865 RCC(rc, finish, fsm->pool.state(&fsm->pool, &fstate));
1866
1867 fsm->omode = fstate.file.opts.omode;
1868
1869 if (fstate.file.ostatus & IWFS_OPEN_NEW) {
1870 rc = _fsm_init_new_lw(fsm, opts);
1871 } else {
1872 rc = _fsm_init_existing_lw(fsm);
1873 }
1874
1875 finish:
1876 if (rc) {
1877 if (f->impl) {
1878 IWRC(_fsm_destroy_locks(f->impl), rc); // we are not locked
1879 IWRC(_fsm_close(f), rc);
1880 }
1881 }
1882 return rc;
1883 }
1884
_fsmfile_ecodefn(locale_t locale,uint32_t ecode)1885 static const char* _fsmfile_ecodefn(locale_t locale, uint32_t ecode) {
1886 if (!((ecode > _IWFS_FSM_ERROR_START) && (ecode < _IWFS_FSM_ERROR_END))) {
1887 return 0;
1888 }
1889 switch (ecode) {
1890 case IWFS_ERROR_NO_FREE_SPACE:
1891 return "No free space. (IWFS_ERROR_NO_FREE_SPACE)";
1892 case IWFS_ERROR_INVALID_BLOCK_SIZE:
1893 return "Invalid block size specified. (IWFS_ERROR_INVALID_BLOCK_SIZE)";
1894 case IWFS_ERROR_RANGE_NOT_ALIGNED:
1895 return "Specified range/offset is not aligned with page/block. "
1896 "(IWFS_ERROR_RANGE_NOT_ALIGNED)";
1897 case IWFS_ERROR_FSM_SEGMENTATION:
1898 return "Free-space map segmentation error. (IWFS_ERROR_FSM_SEGMENTATION)";
1899 case IWFS_ERROR_INVALID_FILEMETA:
1900 return "Invalid file metadata. (IWFS_ERROR_INVALID_FILEMETA)";
1901 case IWFS_ERROR_PLATFORM_PAGE:
1902 return "The block size incompatible with platform page size, data "
1903 "migration required. (IWFS_ERROR_PLATFORM_PAGE)";
1904 case IWFS_ERROR_RESIZE_FAIL:
1905 return "Failed to resize file, "
1906 "conflicting with free-space map location (IWFS_ERROR_RESIZE_FAIL)";
1907 default:
1908 break;
1909 }
1910 return 0;
1911 }
1912
iwfs_fsmfile_init(void)1913 iwrc iwfs_fsmfile_init(void) {
1914 static int _fsmfile_initialized = 0;
1915 iwrc rc = iw_init();
1916 RCRET(rc);
1917 if (!__sync_bool_compare_and_swap(&_fsmfile_initialized, 0, 1)) {
1918 return 0; // initialized already
1919 }
1920 return iwlog_register_ecodefn(_fsmfile_ecodefn);
1921 }
1922
1923 /*************************************************************************************************
1924 * Debug API *
1925 *************************************************************************************************/
1926
iwfs_fsmdbg_number_of_free_areas(IWFS_FSM * f)1927 uint64_t iwfs_fsmdbg_number_of_free_areas(IWFS_FSM *f) {
1928 struct fsm *fsm = f->impl;
1929 return fsm->fsmnum;
1930 }
1931
iwfs_fsmdbg_find_next_set_bit(const uint64_t * addr,uint64_t offset_bit,uint64_t max_offset_bit,int * found)1932 uint64_t iwfs_fsmdbg_find_next_set_bit(
1933 const uint64_t *addr, uint64_t offset_bit, uint64_t max_offset_bit,
1934 int *found
1935 ) {
1936 return _fsm_find_next_set_bit(addr, offset_bit, max_offset_bit, found);
1937 }
1938
iwfs_fsmdbg_find_prev_set_bit(const uint64_t * addr,uint64_t offset_bit,uint64_t min_offset_bit,int * found)1939 uint64_t iwfs_fsmdbg_find_prev_set_bit(
1940 const uint64_t *addr, uint64_t offset_bit, uint64_t min_offset_bit,
1941 int *found
1942 ) {
1943 return _fsm_find_prev_set_bit(addr, offset_bit, min_offset_bit, found);
1944 }
1945
iwfs_fsmdbg_dump_fsm_tree(IWFS_FSM * f,const char * hdr)1946 void iwfs_fsmdbg_dump_fsm_tree(IWFS_FSM *f, const char *hdr) {
1947 assert(f);
1948 struct fsm *fsm = f->impl;
1949 fprintf(stderr, "FSM TREE: %s\n", hdr);
1950 if (!fsm->root) {
1951 fprintf(stderr, "NONE\n");
1952 return;
1953 }
1954 for (struct iwavl_node *n = iwavl_first_in_order(fsm->root); n; n = iwavl_next_in_order(n)) {
1955 struct bkey *k = &BKEY(n);
1956 uint64_t koff = FSMBK_OFFSET(k);
1957 uint64_t klen = FSMBK_LENGTH(k);
1958 fprintf(stderr, "[%" PRIu64 " %" PRIu64 "]\n", koff, klen);
1959 }
1960 }
1961
byte_to_binary(int x)1962 const char* byte_to_binary(int x) {
1963 static char b[9];
1964 b[0] = '\0';
1965 int z;
1966 for (z = 1; z <= 128; z <<= 1) {
1967 strcat(b, ((x & z) == z) ? "1" : "0");
1968 }
1969 return b;
1970 }
1971
iwfs_fsmdb_dump_fsm_bitmap(IWFS_FSM * f)1972 iwrc iwfs_fsmdb_dump_fsm_bitmap(IWFS_FSM *f) {
1973 assert(f);
1974 size_t sp;
1975 uint8_t *mm;
1976 struct fsm *fsm = f->impl;
1977 iwrc rc;
1978 if (fsm->mmap_all) {
1979 rc = fsm->pool.probe_mmap(&fsm->pool, 0, &mm, &sp);
1980 if (!rc) {
1981 if (sp <= fsm->bmoff) {
1982 rc = IWFS_ERROR_NOT_MMAPED;
1983 } else {
1984 mm += fsm->bmoff;
1985 sp = sp - fsm->bmoff;
1986 }
1987 }
1988 } else {
1989 rc = fsm->pool.probe_mmap(&fsm->pool, fsm->bmoff, &mm, &sp);
1990 }
1991 if (rc) {
1992 iwlog_ecode_error3(rc);
1993 return rc;
1994 }
1995 int i = ((fsm->hdrlen >> fsm->bpow) >> 3);
1996 // if (impl->bmoff == impl->aunit) {
1997 // i += ((impl->bmlen >> impl->bpow) >> 3);
1998 // }
1999 for ( ; i < sp && i < fsm->bmlen; ++i) {
2000 uint8_t b = *(mm + i);
2001 fprintf(stderr, "%s", byte_to_binary(b));
2002 }
2003 printf("\n");
2004 return 0;
2005 }
2006
iwfs_fsmdbg_state(IWFS_FSM * f,IWFS_FSMDBG_STATE * d)2007 iwrc iwfs_fsmdbg_state(IWFS_FSM *f, IWFS_FSMDBG_STATE *d) {
2008 FSM_ENSURE_OPEN2(f);
2009 struct fsm *fsm = f->impl;
2010 iwrc rc = _fsm_ctrl_rlock(fsm);
2011 memset(d, 0, sizeof(*d));
2012 IWRC(fsm->pool.state(&fsm->pool, &d->state.exfile), rc);
2013 d->state.block_size = 1U << fsm->bpow;
2014 d->state.oflags = fsm->oflags;
2015 d->state.hdrlen = fsm->hdrlen;
2016 d->state.blocks_num = fsm->bmlen << 3;
2017 d->state.free_segments_num = fsm->fsmnum;
2018 d->state.avg_alloc_size = fsm->crznum > 0 ? (double_t) fsm->crzsum / (double_t) fsm->crznum : 0;
2019 d->state.alloc_dispersion = fsm->crznum > 0 ? (double_t) fsm->crzvar / (double_t) fsm->crznum : 0;
2020 d->bmoff = fsm->bmoff;
2021 d->bmlen = fsm->bmlen;
2022 d->lfbkoff = fsm->lfbkoff;
2023 d->lfbklen = fsm->lfbklen;
2024 IWRC(_fsm_ctrl_unlock(fsm), rc);
2025 return rc;
2026 }
2027