• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************************************
2  * IOWOW library
3  *
4  * MIT License
5  *
6  * Copyright (c) 2012-2022 Softmotions Ltd <info@softmotions.com>
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to deal
10  * in the Software without restriction, including without limitation the rights
11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in all
16  * copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24  * SOFTWARE.
25  *************************************************************************************************/
26 
27 #include "iwfsmfile.h"
28 #include "iwavl.h"
29 #include "iwbits.h"
30 #include "iwlog.h"
31 #include "iwp.h"
32 #include "iwutils.h"
33 #include "iwcfg.h"
34 
35 #include <pthread.h>
36 
37 void iwfs_fsmdbg_dump_fsm_tree(IWFS_FSM *f, const char *hdr);
38 
39 /**
40  * Free-space blocks-tree key.
41  */
42 struct bkey {
43   uint32_t off;
44   uint32_t len;
45 };
46 
47 struct bkey_node {
48   struct bkey       key;
49   struct iwavl_node node;
50 };
51 
52 #define BKEY(nptr_) iwavl_entry(nptr_, struct bkey_node, node)->key
53 
54 /** Additional options for `_fsm_set_bit_status_lw` routine */
55 typedef uint8_t fsm_bmopts_t;
56 
57 /** No options. */
58 #define FSM_BM_NONE ((fsm_bmopts_t) 0x00U)
59 
60 /** Do not modify bitmap. */
61 #define FSM_BM_DRY_RUN ((fsm_bmopts_t) 0x01U)
62 
63 /** Perform strict checking of bitmap consistency */
64 #define FSM_BM_STRICT ((fsm_bmopts_t) 0x02U)
65 
66 /* Maximum size of block: 1Mb */
67 #define FSM_MAX_BLOCK_POW 20
68 
69 /* Maximum number of records used in allocation statistics */
70 #define FSM_MAX_STATS_COUNT 0x0000ffff
71 
72 #define FSM_ENSURE_OPEN(impl_)                                                                          \
73   if (!(impl_) || !(impl_)->f) return IW_ERROR_INVALID_STATE;
74 
75 #define FSM_ENSURE_OPEN2(f_)                                                                             \
76   if (!(f_) || !(f_)->impl) return IW_ERROR_INVALID_STATE;
77 
78 #define FSMBK_OFFSET(b_) ((b_)->off)
79 
80 #define FSMBK_LENGTH(b_) ((b_)->len)
81 
82 ////////////////////////////////////////////////////////////////////////////////////////////////////
83 
84 struct fsm {
85   IWFS_EXT  pool;                 /**< Underlying rwl file. */
86   uint64_t  bmlen;                /**< Free-space bitmap block length in bytes. */
87   uint64_t  bmoff;                /**< Free-space bitmap block offset in bytes. */
88   uint64_t  lfbkoff;              /**< Offset in blocks of free block chunk with the largest offset. */
89   uint64_t  lfbklen;              /**< Length in blocks of free block chunk with the largest offset. */
90   uint64_t  crzsum;               /**< Cumulative sum all allocated blocks */
91   uint64_t  crzvar;               /**< Record sizes standard variance (deviation^2 * N) */
92   uint32_t  hdrlen;               /**< Length of custom file header */
93   uint32_t  crznum;               /**< Number of all allocated continuous areas acquired by `allocate` */
94   uint32_t  fsmnum;               /**< Number of records in fsm */
95   IWFS_FSM *f;                    /**< Self reference. */
96   IWDLSNR  *dlsnr;                /**< Data events listener */
97   struct iwavl_node *root;        /**< Free-space tree */
98   pthread_rwlock_t  *ctlrwlk;     /**< Methods RW lock */
99   size_t aunit;                   /**< System allocation unit size.
100                                        - Page size on *NIX
101                                        - Minimal allocation unit for WIN32 */
102   iwfs_fsm_openflags oflags;      /**< Operation mode flags. */
103   iwfs_omode omode;               /**< Open mode. */
104   uint8_t    bpow;                /**< Block size power for 2 */
105   bool       mmap_all;            /**< Mmap all file data */
106   iwfs_ext_mmap_opts_t mmap_opts; /**< Defaul mmap options used in `add_mmap` */
107 };
108 
109 static iwrc _fsm_ensure_size_lw(struct fsm *fsm, off_t size);
110 
111 ////////////////////////////////////////////////////////////////////////////////////////////////////
112 
_fsm_cmp_key(const struct bkey * a,const struct bkey * b)113 IW_INLINE int _fsm_cmp_key(const struct bkey *a, const struct bkey *b) {
114   int ret = ((FSMBK_LENGTH(b) < FSMBK_LENGTH(a)) - (FSMBK_LENGTH(a) < FSMBK_LENGTH(b)));
115   if (ret) {
116     return ret;
117   } else {
118     return ((FSMBK_OFFSET(b) < FSMBK_OFFSET(a)) - (FSMBK_OFFSET(a) < FSMBK_OFFSET(b)));
119   }
120 }
121 
_fsm_cmp_node(const struct iwavl_node * an,const struct iwavl_node * bn)122 IW_INLINE int _fsm_cmp_node(const struct iwavl_node *an, const struct iwavl_node *bn) {
123   const struct bkey *ak = &BKEY(an);
124   const struct bkey *bk = &BKEY(bn);
125   return _fsm_cmp_key(ak, bk);
126 }
127 
_fsm_cmp_ctx(const void * ctx,const struct iwavl_node * bn)128 IW_INLINE int _fsm_cmp_ctx(const void *ctx, const struct iwavl_node *bn) {
129   const struct bkey *ak = ctx;
130   const struct bkey *bk = &BKEY(bn);
131   return _fsm_cmp_key(ak, bk);
132 }
133 
_fsm_ctrl_wlock(struct fsm * fsm)134 IW_INLINE iwrc _fsm_ctrl_wlock(struct fsm *fsm) {
135   int rci = fsm->ctlrwlk ? pthread_rwlock_wrlock(fsm->ctlrwlk) : 0;
136   return (rci ? iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci) : 0);
137 }
138 
_fsm_ctrl_rlock(struct fsm * fsm)139 IW_INLINE iwrc _fsm_ctrl_rlock(struct fsm *fsm) {
140   int rci = fsm->ctlrwlk ? pthread_rwlock_rdlock(fsm->ctlrwlk) : 0;
141   return (rci ? iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci) : 0);
142 }
143 
_fsm_ctrl_unlock(struct fsm * fsm)144 IW_INLINE iwrc _fsm_ctrl_unlock(struct fsm *fsm) {
145   int rci = fsm->ctlrwlk ? pthread_rwlock_unlock(fsm->ctlrwlk) : 0;
146   return (rci ? iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci) : 0);
147 }
148 
_fsm_bmptr(struct fsm * fsm,uint64_t ** bmptr)149 IW_INLINE iwrc _fsm_bmptr(struct fsm *fsm, uint64_t **bmptr) {
150   size_t sp;
151   uint8_t *mm;
152   *bmptr = 0;
153   // get mmap pointer without locked
154   iwrc rc = fsm->pool.probe_mmap(&fsm->pool, fsm->mmap_all ? 0 : fsm->bmoff, &mm, &sp);
155   RCRET(rc);
156   if (fsm->mmap_all) {
157     if (sp < fsm->bmoff + fsm->bmlen) {
158       return IWFS_ERROR_NOT_MMAPED;
159     }
160     *bmptr = (uint64_t*) (mm + fsm->bmoff);
161   } else {
162     if (sp < fsm->bmlen) {
163       return IWFS_ERROR_NOT_MMAPED;
164     }
165     *bmptr = (uint64_t*) mm;
166   }
167   return 0;
168 }
169 
_fsm_init_bkey_node(struct bkey_node * n,uint64_t offset_blk,uint64_t len_blk)170 IW_INLINE WUR iwrc _fsm_init_bkey_node(struct bkey_node *n, uint64_t offset_blk, uint64_t len_blk) {
171   if (offset_blk > (uint32_t) -1 || len_blk > (uint32_t) -1) {
172     return IW_ERROR_OVERFLOW;
173   }
174   n->key.off = (uint32_t) offset_blk;
175   n->key.len = (uint32_t) len_blk;
176   return 0;
177 }
178 
_fsm_init_bkey(struct bkey * k,uint64_t offset_blk,uint64_t len_blk)179 IW_INLINE iwrc _fsm_init_bkey(struct bkey *k, uint64_t offset_blk, uint64_t len_blk) {
180   if (offset_blk > (uint32_t) -1 || len_blk > (uint32_t) -1) {
181     return IW_ERROR_OVERFLOW;
182   }
183   k->off = (uint32_t) offset_blk;
184   k->len = (uint32_t) len_blk;
185   return 0;
186 }
187 
_fsm_del_fbk2(struct fsm * fsm,struct iwavl_node * n)188 IW_INLINE void _fsm_del_fbk2(struct fsm *fsm, struct iwavl_node *n) {
189   iwavl_remove(&fsm->root, n), --fsm->fsmnum;
190   struct bkey_node *bk = iwavl_entry(n, struct bkey_node, node);
191   if (bk->key.off == fsm->lfbkoff) {
192     fsm->lfbkoff = 0;
193     fsm->lfbklen = 0;
194   }
195   free(bk);
196 }
197 
_fsm_del_fbk(struct fsm * fsm,uint64_t offset_blk,uint64_t length_blk)198 IW_INLINE void _fsm_del_fbk(struct fsm *fsm, uint64_t offset_blk, uint64_t length_blk) {
199   struct bkey bkey;
200   if (!_fsm_init_bkey(&bkey, offset_blk, length_blk)) {
201     struct iwavl_node *n = iwavl_lookup(fsm->root, &bkey, _fsm_cmp_ctx);
202     assert(n);
203     if (n) {
204       _fsm_del_fbk2(fsm, n);
205     }
206   }
207 }
208 
_fsm_put_fbk(struct fsm * fsm,uint64_t offset_blk,uint64_t length_blk)209 IW_INLINE iwrc _fsm_put_fbk(struct fsm *fsm, uint64_t offset_blk, uint64_t length_blk) {
210   iwrc rc = 0;
211   struct bkey_node *bk;
212   RCB(finish, bk = malloc(sizeof(*bk)));
213   RCC(rc, finish, _fsm_init_bkey_node(bk, offset_blk, length_blk));
214   if (iwavl_insert(&fsm->root, &bk->node, _fsm_cmp_node)) {
215     free(bk);
216   } else {
217     ++fsm->fsmnum;
218     if (offset_blk + length_blk >= fsm->lfbkoff + fsm->lfbklen) {
219       fsm->lfbkoff = offset_blk;
220       fsm->lfbklen = length_blk;
221     }
222   }
223 finish:
224   if (rc) {
225     free(bk);
226   }
227   return rc;
228 }
229 
_fsm_find_matching_fblock_lw(struct fsm * fsm,uint64_t offset_blk,uint64_t length_blk,iwfs_fsm_aflags opts)230 IW_INLINE const struct iwavl_node* _fsm_find_matching_fblock_lw(
231   struct fsm     *fsm,
232   uint64_t        offset_blk,
233   uint64_t        length_blk,
234   iwfs_fsm_aflags opts
235   ) {
236   struct bkey bk;
237   const struct iwavl_node *ub, *lb;
238   if (_fsm_init_bkey(&bk, offset_blk, length_blk)) {
239     return 0;
240   }
241 
242   iwavl_lookup_bounds(fsm->root, &bk, _fsm_cmp_ctx, &lb, &ub);
243 
244   struct bkey *uk = ub ? &BKEY(ub) : 0;
245   struct bkey *lk = lb ? &BKEY(lb) : 0;
246 
247   uint64_t lklength = lk ? FSMBK_LENGTH(lk) : 0;
248   uint64_t uklength = uk ? FSMBK_LENGTH(uk) : 0;
249 
250   if (lklength == length_blk) {
251     return lb;
252   } else if (uklength == length_blk) {
253     return ub;
254   }
255   if (lklength > length_blk) {
256     return lb;
257   } else if (uklength > length_blk) {
258     return ub;
259   }
260   return 0;
261 }
262 
263 /**
264  * @brief Set the allocation bits in the fsm bitmap.
265  *
266  * @param fms
267  * @param offset_bits Bit offset in the bitmap.
268  * @param length_bits Number of bits to set
269  * @param bit_status  If `1` bits will be set to `1` otherwise `0`
270  * @param opts        Operation options
271  */
_fsm_set_bit_status_lw(struct fsm * fsm,const uint64_t offset_bits,const uint64_t length_bits_,const int bit_status,const fsm_bmopts_t opts)272 static iwrc _fsm_set_bit_status_lw(
273   struct fsm        *fsm,
274   const uint64_t     offset_bits,
275   const uint64_t     length_bits_,
276   const int          bit_status,
277   const fsm_bmopts_t opts
278   ) {
279   iwrc rc;
280   size_t sp;
281   uint8_t *mm;
282   register int64_t length_bits = length_bits_;
283   register uint64_t *p, set_mask;
284   uint64_t bend = offset_bits + length_bits;
285   int set_bits;
286 
287   if (bend < offset_bits) { // overflow
288     return IW_ERROR_OUT_OF_BOUNDS;
289   }
290   assert(fsm->bmlen * 8 >= offset_bits + length_bits);
291   if (fsm->bmlen * 8 < offset_bits + length_bits) {
292     return IWFS_ERROR_FSM_SEGMENTATION;
293   }
294   if (fsm->mmap_all) {
295     rc = fsm->pool.probe_mmap(&fsm->pool, 0, &mm, &sp);
296     RCRET(rc);
297     if (sp < fsm->bmoff + fsm->bmlen) {
298       return IWFS_ERROR_NOT_MMAPED;
299     } else {
300       mm += fsm->bmoff;
301     }
302   } else {
303     rc = fsm->pool.probe_mmap(&fsm->pool, fsm->bmoff, &mm, &sp);
304     RCRET(rc);
305     if (sp < fsm->bmlen) {
306       return IWFS_ERROR_NOT_MMAPED;
307     }
308   }
309   p = ((uint64_t*) mm) + offset_bits / 64;
310   set_bits = 64 - (offset_bits & (64 - 1)); // NOLINT
311   set_mask = (~((uint64_t) 0) << (offset_bits & (64 - 1)));
312 
313 #ifdef IW_BIGENDIAN
314   while (length_bits - set_bits >= 0) {
315     uint64_t pv = *p;
316     pv = IW_ITOHLL(pv);
317     if (bit_status) {
318       if ((opts & FSM_BM_STRICT) && (pv & set_mask)) {
319         rc = IWFS_ERROR_FSM_SEGMENTATION;
320       }
321       if ((opts & FSM_BM_DRY_RUN) == 0) {
322         pv |= set_mask;
323         *p = IW_HTOILL(pv);
324       }
325     } else {
326       if ((opts & FSM_BM_STRICT) && ((pv & set_mask) != set_mask)) {
327         rc = IWFS_ERROR_FSM_SEGMENTATION;
328       }
329       if ((opts & FSM_BM_DRY_RUN) == 0) {
330         pv &= ~set_mask;
331         *p = IW_HTOILL(pv);
332       }
333     }
334     length_bits -= set_bits;
335     set_bits = 64;
336     set_mask = ~((uint64_t) 0);
337     ++p;
338   }
339   if (length_bits) {
340     uint64_t pv = *p;
341     pv = IW_ITOHLL(pv);
342     set_mask &= (bend & (64 - 1)) ? ((((uint64_t) 1) << (bend & (64 - 1))) - 1) : ~((uint64_t) 0);
343     if (bit_status) {
344       if ((opts & FSM_BM_STRICT) && (pv & set_mask)) {
345         rc = IWFS_ERROR_FSM_SEGMENTATION;
346       }
347       if ((opts & FSM_BM_DRY_RUN) == 0) {
348         pv |= set_mask;
349         *p = IW_HTOILL(pv);
350       }
351     } else {
352       if ((opts & FSM_BM_STRICT) && ((pv & set_mask) != set_mask)) {
353         rc = IWFS_ERROR_FSM_SEGMENTATION;
354       }
355       if ((opts & FSM_BM_DRY_RUN) == 0) {
356         pv &= ~set_mask;
357         *p = IW_HTOILL(pv);
358       }
359     }
360   }
361 #else
362   while (length_bits - set_bits >= 0) {
363     if (bit_status) {
364       if ((opts & FSM_BM_STRICT) && (*p & set_mask)) {
365         rc = IWFS_ERROR_FSM_SEGMENTATION;
366       }
367       if ((opts & FSM_BM_DRY_RUN) == 0) {
368         *p |= set_mask;
369       }
370     } else {
371       if ((opts & FSM_BM_STRICT) && ((*p & set_mask) != set_mask)) {
372         rc = IWFS_ERROR_FSM_SEGMENTATION;
373       }
374       if ((opts & FSM_BM_DRY_RUN) == 0) {
375         *p &= ~set_mask;
376       }
377     }
378     length_bits -= set_bits;
379     set_bits = 64;
380     set_mask = ~((uint64_t) 0);
381     ++p;
382   }
383   if (length_bits) {
384     set_mask &= (bend & (64 - 1)) ? ((((uint64_t) 1) << (bend & (64 - 1))) - 1) : ~((uint64_t) 0);
385     if (bit_status) {
386       if ((opts & FSM_BM_STRICT) && (*p & set_mask)) {
387         rc = IWFS_ERROR_FSM_SEGMENTATION;
388       }
389       if ((opts & FSM_BM_DRY_RUN) == 0) {
390         *p |= set_mask;
391       }
392     } else {
393       if ((opts & FSM_BM_STRICT) && ((*p & set_mask) != set_mask)) {
394         rc = IWFS_ERROR_FSM_SEGMENTATION;
395       }
396       if ((opts & FSM_BM_DRY_RUN) == 0) {
397         *p &= ~set_mask;
398       }
399     }
400   }
401 #endif
402   if (!rc && fsm->dlsnr) {
403     uint64_t so = offset_bits / 8;
404     uint64_t lb = length_bits_ + offset_bits % 8;
405     uint64_t dl = lb / 8;
406     if (lb % 8) {
407       ++dl;
408     }
409     rc = fsm->dlsnr->onwrite(fsm->dlsnr, fsm->bmoff + so, mm + so, dl, 0);
410   }
411   return rc;
412 }
413 
414 /**
415  *  @brief Allocate a continuous segment of blocks with page aligned offset.
416  *
417  *  @param fsm `struct fsm`
418  *  @param length_blk Desired segment length in blocks.
419  *  @param [in,out] offset_blk Allocated segment offset in blocks will be stored into.
420                     It also specified the desired segment offset to provide
421  *                  allocation locality.
422  *  @param [out] olength_blk Assigned segment length in blocks.
423  *  @param  max_offset_blk Maximal offset of allocated block.
424  *  @param opts Allocation options.
425  */
_fsm_blk_allocate_aligned_lw(struct fsm * fsm,const uint64_t length_blk,uint64_t * offset_blk,uint64_t * olength_blk,const uint64_t max_offset_blk,const iwfs_fsm_aflags opts)426 static iwrc _fsm_blk_allocate_aligned_lw(
427   struct fsm           *fsm,
428   const uint64_t        length_blk,
429   uint64_t             *offset_blk,
430   uint64_t             *olength_blk,
431   const uint64_t        max_offset_blk,
432   const iwfs_fsm_aflags opts
433   ) {
434   fsm_bmopts_t bopts = FSM_BM_NONE;
435   size_t aunit_blk = (fsm->aunit >> fsm->bpow);
436   assert(fsm && length_blk > 0);
437   if (fsm->oflags & IWFSM_STRICT) {
438     bopts |= FSM_BM_STRICT;
439   }
440   *olength_blk = 0;
441   *offset_blk = 0;
442 
443   /* First attempt */
444   const struct iwavl_node *nn = _fsm_find_matching_fblock_lw(fsm, 0, length_blk + aunit_blk, opts);
445   if (!nn) {
446     nn = _fsm_find_matching_fblock_lw(fsm, 0, length_blk, opts);
447     if (!nn) {
448       return IWFS_ERROR_NO_FREE_SPACE;
449     }
450   }
451 
452   struct bkey *nk = &BKEY(nn);
453   uint64_t akoff = FSMBK_OFFSET(nk);
454   uint64_t aklen = FSMBK_LENGTH(nk);
455   uint64_t noff = IW_ROUNDUP(akoff, aunit_blk);
456 
457   if ((noff <= max_offset_blk) && (noff < aklen + akoff) && (aklen - (noff - akoff) >= length_blk)) {
458     _fsm_del_fbk(fsm, akoff, aklen);
459     aklen = aklen - (noff - akoff);
460     if (noff > akoff) {
461       _fsm_put_fbk(fsm, akoff, noff - akoff);
462     }
463     if (aklen > length_blk) {
464       _fsm_put_fbk(fsm, noff + length_blk, aklen - length_blk);
465     }
466     *offset_blk = noff;
467     *olength_blk = length_blk;
468     return _fsm_set_bit_status_lw(fsm, noff, length_blk, 1, bopts);
469   }
470 
471   aklen = 0;
472   akoff = UINT64_MAX;
473 
474   // full scan
475   for (struct iwavl_node *n = iwavl_first_in_order(fsm->root); n; n = iwavl_next_in_order(n)) {
476     struct bkey *k = &BKEY(n);
477     uint64_t koff = FSMBK_OFFSET(k);
478     uint64_t klen = FSMBK_LENGTH(k);
479     if (koff < akoff) {
480       noff = IW_ROUNDUP(koff, aunit_blk);
481       if (noff <= max_offset_blk && (noff < klen + koff) && (klen - (noff - koff) >= length_blk)) {
482         akoff = koff;
483         aklen = klen;
484       }
485     }
486   }
487 
488   if (akoff == UINT64_MAX) {
489     return IWFS_ERROR_NO_FREE_SPACE;
490   }
491   _fsm_del_fbk(fsm, akoff, aklen);
492   noff = IW_ROUNDUP(akoff, aunit_blk);
493   aklen = aklen - (noff - akoff);
494   if (noff > akoff) {
495     _fsm_put_fbk(fsm, akoff, noff - akoff);
496   }
497   if (aklen > length_blk) {
498     _fsm_put_fbk(fsm, noff + length_blk, aklen - length_blk);
499   }
500   *offset_blk = noff;
501   *olength_blk = length_blk;
502   return _fsm_set_bit_status_lw(fsm, noff, length_blk, 1, bopts);
503 }
504 
_fsm_node_destroy(struct iwavl_node * root)505 static void _fsm_node_destroy(struct iwavl_node *root) {
506   for (struct iwavl_node *n = iwavl_first_in_postorder(root), *p;
507        n && (p = iwavl_get_parent(n), 1);
508        n = iwavl_next_in_postorder(n, p)) {
509     struct bkey_node *bk = iwavl_entry(n, struct bkey_node, node);
510     free(bk);
511   }
512 }
513 
514 /**
515  * @brief Load existing bitmap area into free-space search tree.
516  * @param fsm  `struct fsm`
517  * @param bm    Bitmap area start ptr
518  * @param len   Bitmap area length in bytes.
519  */
_fsm_load_fsm_lw(struct fsm * fsm,const uint8_t * bm,uint64_t len)520 static void _fsm_load_fsm_lw(struct fsm *fsm, const uint8_t *bm, uint64_t len) {
521   uint64_t cbnum = 0, fbklength = 0, fbkoffset = 0;
522 
523   _fsm_node_destroy(fsm->root);
524   fsm->root = 0;
525   fsm->fsmnum = 0;
526 
527   for (uint64_t b = 0; b < len; ++b) {
528     register uint8_t bb = bm[b];
529     if (bb == 0) {
530       fbklength += 8;
531       cbnum += 8;
532     } else if (bb == 0xffU) {
533       if (fbklength) {
534         fbkoffset = cbnum - fbklength;
535         _fsm_put_fbk(fsm, fbkoffset, fbklength);
536         fbklength = 0;
537       }
538       cbnum += 8;
539     } else {
540       for (int i = 0; i < 8; ++i, ++cbnum) {
541         if (bb & (1U << i)) {
542           if (fbklength) {
543             fbkoffset = cbnum - fbklength;
544             _fsm_put_fbk(fsm, fbkoffset, fbklength);
545             fbklength = 0;
546           }
547         } else {
548           ++fbklength;
549         }
550       }
551     }
552   }
553   if (fbklength > 0) {
554     fbkoffset = len * 8 - fbklength;
555     _fsm_put_fbk(fsm, fbkoffset, fbklength);
556   }
557 }
558 
559 /**
560  * @brief Flush a current `iwfsmfile` metadata into the file header.
561  * @param fsm
562  * @param is_sync If `1` perform mmap sync.
563  * @return
564  */
_fsm_write_meta_lw(struct fsm * fsm)565 static iwrc _fsm_write_meta_lw(struct fsm *fsm) {
566   uint64_t llv;
567   size_t wlen;
568   uint32_t sp = 0, lv;
569   uint8_t hdr[IWFSM_CUSTOM_HDR_DATA_OFFSET] = { 0 };
570 
571   /*
572       [FSM_CTL_MAGICK u32][block pow u8]
573       [bmoffset u64][bmlength u64]
574       [u64 crzsum][u32 crznum][u64 crszvar][u256 reserved]
575       [custom header size u32][custom header data...]
576       [fsm data...]
577    */
578 
579   /* magic */
580   lv = IW_HTOIL(IWFSM_MAGICK);
581   assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
582   memcpy(hdr + sp, &lv, sizeof(lv));
583   sp += sizeof(lv);
584 
585   /* block pow */
586   static_assert(sizeof(fsm->bpow) == 1, "sizeof(fms->bpow) == 1");
587   assert(sp + sizeof(fsm->bpow) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
588   memcpy(hdr + sp, &fsm->bpow, sizeof(fsm->bpow));
589   sp += sizeof(fsm->bpow);
590 
591   /* fsm bitmap block offset */
592   llv = fsm->bmoff;
593   llv = IW_HTOILL(llv);
594   assert(sp + sizeof(llv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
595   memcpy(hdr + sp, &llv, sizeof(llv));
596   sp += sizeof(llv);
597 
598   /* fsm bitmap block length */
599   llv = fsm->bmlen;
600   llv = IW_HTOILL(llv);
601   assert(sp + sizeof(llv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
602   memcpy(hdr + sp, &llv, sizeof(llv));
603   sp += sizeof(llv);
604 
605   /* Cumulative sum of record sizes acquired by `allocate` */
606   llv = fsm->crzsum;
607   llv = IW_HTOILL(llv);
608   assert(sp + sizeof(llv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
609   memcpy(hdr + sp, &llv, sizeof(llv));
610   sp += sizeof(llv);
611 
612   /* Cumulative number of records acquired by `allocated` */
613   lv = fsm->crznum;
614   lv = IW_HTOIL(lv);
615   assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
616   memcpy(hdr + sp, &lv, sizeof(lv));
617   sp += sizeof(lv);
618 
619   /* Record sizes standard variance (deviation^2 * N) */
620   llv = fsm->crzvar;
621   llv = IW_HTOILL(llv);
622   assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
623   memcpy(hdr + sp, &llv, sizeof(llv));
624   sp += sizeof(llv);
625 
626   /* Reserved */
627   sp += 32;
628 
629   /* Size of header */
630   lv = fsm->hdrlen;
631   lv = IW_HTOIL(lv);
632   assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
633   memcpy(hdr + sp, &lv, sizeof(lv));
634   sp += sizeof(lv);
635 
636   assert(sp == IWFSM_CUSTOM_HDR_DATA_OFFSET);
637   return fsm->pool.write(&fsm->pool, 0, hdr, IWFSM_CUSTOM_HDR_DATA_OFFSET, &wlen);
638 }
639 
640 /**
641  * @brief Search for the first next set bit position
642  *        starting from the specified offset bit (INCLUDED).
643  */
_fsm_find_next_set_bit(const uint64_t * addr,register uint64_t offset_bit,const uint64_t max_offset_bit,int * found)644 static uint64_t _fsm_find_next_set_bit(
645   const uint64_t   *addr,
646   register uint64_t offset_bit,
647   const uint64_t    max_offset_bit,
648   int              *found
649   ) {
650   *found = 0;
651   register uint64_t bit, size;
652   register const uint64_t *p = addr + offset_bit / 64;
653   if (offset_bit >= max_offset_bit) {
654     return 0;
655   }
656   bit = offset_bit & (64 - 1);
657   offset_bit -= bit;
658   size = max_offset_bit - offset_bit;
659 
660 #ifdef IW_BIGENDIAN
661   uint64_t pv = *p;
662   if (bit) {
663     pv = IW_ITOHLL(pv) & (~((uint64_t) 0) << bit);
664     if (pv) {
665       pv = iwbits_find_first_sbit64(pv);
666       if (pv >= size) {
667         return 0;
668       } else {
669         *found = 1;
670         return offset_bit + pv;
671       }
672     }
673     if (size <= 64) {
674       return 0;
675     }
676     offset_bit += 64;
677     size -= 64;
678     ++p;
679   }
680   while (size & ~(64 - 1)) {
681     pv = *(p++);
682     if (pv) {
683       *found = 1;
684       return offset_bit + iwbits_find_first_sbit64(IW_ITOHLL(pv));
685     }
686     offset_bit += 64;
687     size -= 64;
688   }
689   if (!size) {
690     return 0;
691   }
692   pv = *p;
693   pv = IW_ITOHLL(pv) & (~((uint64_t) 0) >> (64 - size));
694   if (pv) {
695     *found = 1;
696     return offset_bit + iwbits_find_first_sbit64(pv);
697   } else {
698     return 0;
699   }
700 #else
701   register uint64_t tmp;
702   if (bit) {
703     tmp = *p & (~((uint64_t) 0) << bit);
704     if (tmp) {
705       tmp = iwbits_find_first_sbit64(tmp);
706       if (tmp >= size) {
707         return 0;
708       } else {
709         *found = 1;
710         return offset_bit + tmp;
711       }
712     }
713     if (size <= 64) {
714       return 0;
715     }
716     offset_bit += 64;
717     size -= 64;
718     ++p;
719   }
720   while (size & ~(64 - 1)) {
721     if ((tmp = *(p++))) {
722       *found = 1;
723       return offset_bit + iwbits_find_first_sbit64(tmp);
724     }
725     offset_bit += 64;
726     size -= 64;
727   }
728   if (!size) {
729     return 0;
730   }
731   tmp = (*p) & (~((uint64_t) 0) >> (64 - size));
732   if (tmp) {
733     *found = 1;
734     return offset_bit + iwbits_find_first_sbit64(tmp);
735   } else {
736     return 0;
737   }
738 #endif
739 }
740 
741 /**
742  * @brief Search for the first previous set bit position
743  *        starting from the specified offset_bit (EXCLUDED).
744  */
_fsm_find_prev_set_bit(const uint64_t * addr,register uint64_t offset_bit,const uint64_t min_offset_bit,int * found)745 static uint64_t _fsm_find_prev_set_bit(
746   const uint64_t   *addr,
747   register uint64_t offset_bit,
748   const uint64_t    min_offset_bit,
749   int              *found
750   ) {
751   register const uint64_t *p;
752   register uint64_t tmp, bit, size;
753   *found = 0;
754   if (min_offset_bit >= offset_bit) {
755     return 0;
756   }
757   size = offset_bit - min_offset_bit;
758   bit = offset_bit & (64 - 1);
759   p = addr + offset_bit / 64;
760 
761 #ifdef IW_BIGENDIAN
762   uint64_t pv;
763   if (bit) {
764     pv = *p;
765     pv = (iwbits_reverse_64(IW_ITOHLL(pv)) >> (64 - bit));
766     if (pv) {
767       pv = iwbits_find_first_sbit64(pv);
768       if (pv >= size) {
769         return 0;
770       } else {
771         *found = 1;
772         assert(offset_bit > pv);
773         return offset_bit > pv ? offset_bit - pv - 1 : 0;
774       }
775     }
776     offset_bit -= bit;
777     size -= bit;
778   }
779   while (size & ~(64 - 1)) {
780     if (*(--p)) {
781       pv = *p;
782       *found = 1;
783       tmp = iwbits_find_first_sbit64(iwbits_reverse_64(IW_ITOHLL(pv)));
784       assert(offset_bit > tmp);
785       return offset_bit > tmp ? offset_bit - tmp - 1 : 0;
786     }
787     offset_bit -= 64;
788     size -= 64;
789   }
790   if (size == 0) {
791     return 0;
792   }
793   pv = *(--p);
794   tmp = iwbits_reverse_64(IW_ITOHLL(pv)) & ((((uint64_t) 1) << size) - 1);
795 #else
796   if (bit) {
797     tmp = (iwbits_reverse_64(*p) >> (64 - bit));
798     if (tmp) {
799       tmp = iwbits_find_first_sbit64(tmp);
800       if (tmp >= size) {
801         return 0;
802       } else {
803         *found = 1;
804         assert(offset_bit > tmp);
805         return offset_bit > tmp ? offset_bit - tmp - 1 : 0;
806       }
807     }
808     offset_bit -= bit;
809     size -= bit;
810   }
811   while (size & ~(64 - 1)) {
812     if (*(--p)) {
813       *found = 1;
814       tmp = iwbits_find_first_sbit64(iwbits_reverse_64(*p));
815       assert(offset_bit > tmp);
816       return offset_bit > tmp ? offset_bit - tmp - 1 : 0;
817     }
818     offset_bit -= 64;
819     size -= 64;
820   }
821   if (size == 0) {
822     return 0;
823   }
824   tmp = iwbits_reverse_64(*(--p)) & ((((uint64_t) 1) << size) - 1);
825 #endif
826   if (tmp) {
827     uint64_t tmp2;
828     *found = 1;
829     tmp2 = iwbits_find_first_sbit64(tmp);
830     assert(offset_bit > tmp2);
831     return offset_bit > tmp2 ? offset_bit - tmp2 - 1 : 0;
832   } else {
833     return 0;
834   }
835 }
836 
837 /**
838  * @brief Return a previously allocated blocks
839  *        back into the free-blocks pool.
840  *
841  * @param fms `struct fsm`
842  * @param offset_blk Starting block number of the specified range.
843  * @param length_blk Range size in blocks.
844  */
_fsm_blk_deallocate_lw(struct fsm * fsm,const uint64_t offset_blk,const uint64_t length_blk)845 static iwrc _fsm_blk_deallocate_lw(
846   struct fsm    *fsm,
847   const uint64_t offset_blk,
848   const uint64_t length_blk
849   ) {
850   iwrc rc;
851   uint64_t *bmptr;
852   uint64_t left, right;
853   int hasleft = 0, hasright = 0;
854   uint64_t key_offset = offset_blk, key_length = length_blk;
855   uint64_t rm_offset = 0, rm_length = 0;
856   uint64_t lfbkoff = fsm->lfbkoff;
857   uint64_t end_offset_blk = offset_blk + length_blk;
858   fsm_bmopts_t bopts = FSM_BM_NONE;
859 
860 
861   if (fsm->oflags & IWFSM_STRICT) {
862     bopts |= FSM_BM_STRICT;
863   }
864   rc = _fsm_set_bit_status_lw(fsm, offset_blk, length_blk, 0, bopts);
865   RCRET(rc);
866 
867   rc = _fsm_bmptr(fsm, &bmptr);
868   RCRET(rc);
869 
870   /* Merge with neighborhoods */
871   left = _fsm_find_prev_set_bit(bmptr, offset_blk, 0, &hasleft);
872   if (lfbkoff && (lfbkoff == end_offset_blk)) {
873     right = lfbkoff + fsm->lfbklen;
874     hasright = 1;
875   } else {
876     uint64_t maxoff = lfbkoff ? lfbkoff : (fsm->bmlen << 3);
877     right = _fsm_find_next_set_bit(bmptr, end_offset_blk, maxoff, &hasright);
878   }
879 
880   if (hasleft) {
881     if (offset_blk > left + 1) {
882       left += 1;
883       rm_offset = left;
884       rm_length = offset_blk - left;
885       _fsm_del_fbk(fsm, rm_offset, rm_length);
886       key_offset = rm_offset;
887       key_length += rm_length;
888     }
889   } else if (offset_blk > 0) { /* zero start */
890     rm_offset = 0;
891     rm_length = offset_blk;
892     _fsm_del_fbk(fsm, rm_offset, rm_length);
893     key_offset = rm_offset;
894     key_length += rm_length;
895   }
896   if (hasright && (right > end_offset_blk)) {
897     rm_offset = end_offset_blk;
898     rm_length = right - end_offset_blk;
899     _fsm_del_fbk(fsm, rm_offset, rm_length);
900     key_length += rm_length;
901   }
902   IWRC(_fsm_put_fbk(fsm, key_offset, key_length), rc);
903   return rc;
904 }
905 
906 /**
907  * @brief Initialize a new free-space bitmap area.
908  *
909  * If bitmap exists, its content will be moved into newly created area.
910  * Blocks from the previous bitmap are will disposed and deallocated.
911  *
912  * @param fsm `struct fsm
913  * @param bmoff Byte offset of the new bitmap. Value must be page aligned.
914  * @param bmlen Byte length of the new bitmap. Value must be page aligned.
915                 Its length must not be lesser than length of old bitmap.
916  */
_fsm_init_lw(struct fsm * fsm,uint64_t bmoff,uint64_t bmlen)917 static iwrc _fsm_init_lw(struct fsm *fsm, uint64_t bmoff, uint64_t bmlen) {
918   iwrc rc;
919   uint8_t *mm, *mm2;
920   size_t sp, sp2;
921   uint64_t old_bmoff, old_bmlen;
922   IWFS_EXT *pool = &fsm->pool;
923 
924   if ((bmlen & ((1U << fsm->bpow) - 1)) || (bmoff & ((1U << fsm->bpow) - 1)) || (bmoff & (fsm->aunit - 1))) {
925     return IWFS_ERROR_RANGE_NOT_ALIGNED;
926   }
927   if (bmlen < fsm->bmlen) {
928     rc = IW_ERROR_INVALID_ARGS;
929     iwlog_ecode_error(rc, "Length of the newly initiated bitmap area (bmlen): %" PRIu64
930                       " must not be lesser than the current bitmap area length %" PRIu64 "",
931                       bmlen, fsm->bmlen);
932     return rc;
933   }
934   if (bmlen * 8 < ((bmoff + bmlen) >> fsm->bpow) + 1) {
935     rc = IW_ERROR_INVALID_ARGS;
936     iwlog_ecode_error(rc, "Length of the newly initiated bitmap area (bmlen): %" PRIu64
937                       " is not enough to handle bitmap itself and the file header area.",
938                       bmlen);
939     return rc;
940   }
941   rc = _fsm_ensure_size_lw(fsm, bmoff + bmlen);
942   RCRET(rc);
943   if (fsm->mmap_all) {
944     // get mmap area without locking, since we ensured what pool file will not be remapped
945     rc = pool->probe_mmap(pool, 0, &mm, &sp);
946     RCRET(rc);
947     if (sp < bmoff + bmlen) {
948       return IWFS_ERROR_NOT_MMAPED;
949     } else {
950       mm += bmoff;
951     }
952   } else {
953     // get mmap area without locking, since we ensured what pool file will not be remapped
954     rc = pool->probe_mmap(pool, bmoff, &mm, &sp);
955     RCRET(rc);
956     if (sp < bmlen) {
957       return IWFS_ERROR_NOT_MMAPED;
958     }
959   }
960   if (fsm->bmlen) {
961     /* We have an old active bitmap. Lets copy its content to the new location.*/
962     if (IW_RANGES_OVERLAP(fsm->bmoff, fsm->bmoff + fsm->bmlen, bmoff, bmoff + bmlen)) {
963       iwlog_ecode_error2(rc, "New and old bitmap areas are overlaped");
964       return IW_ERROR_INVALID_ARGS;
965     }
966     if (fsm->mmap_all) {
967       mm2 = mm - bmoff + fsm->bmoff;
968     } else {
969       rc = pool->probe_mmap(pool, fsm->bmoff, &mm2, &sp2);
970       if (!rc && (sp2 < fsm->bmlen)) {
971         rc = IWFS_ERROR_NOT_MMAPED;
972       }
973       if (rc) {
974         iwlog_ecode_error2(rc, "Old bitmap area is not mmaped");
975         return rc;
976       }
977     }
978     assert(!((fsm->bmlen - bmlen) & ((1U << fsm->bpow) - 1)));
979     if (fsm->dlsnr) {
980       rc = fsm->dlsnr->onwrite(fsm->dlsnr, bmoff, mm2, fsm->bmlen, 0);
981       RCRET(rc);
982     }
983     memcpy(mm, mm2, fsm->bmlen);
984     if (bmlen > fsm->bmlen) {
985       memset(mm + fsm->bmlen, 0, bmlen - fsm->bmlen);
986       if (fsm->dlsnr) {
987         rc = fsm->dlsnr->onset(fsm->dlsnr, bmoff + fsm->bmlen, 0, bmlen - fsm->bmlen, 0);
988         RCRET(rc);
989       }
990     }
991   } else {
992     mm2 = 0;
993     memset(mm, 0, bmlen);
994     if (fsm->dlsnr) {
995       rc = fsm->dlsnr->onset(fsm->dlsnr, bmoff, 0, bmlen, 0);
996       RCRET(rc);
997     }
998   }
999 
1000   /* Backup the previous bitmap range */
1001   old_bmlen = fsm->bmlen;
1002   old_bmoff = fsm->bmoff;
1003   fsm->bmoff = bmoff;
1004   fsm->bmlen = bmlen;
1005 
1006   RCC(rc, rollback, _fsm_set_bit_status_lw(fsm, (bmoff >> fsm->bpow), (bmlen >> fsm->bpow), 1, FSM_BM_NONE));
1007   if (!old_bmlen) { /* First time initialization */
1008     /* Header allocation */
1009     RCC(rc, rollback, _fsm_set_bit_status_lw(fsm, 0, (fsm->hdrlen >> fsm->bpow), 1, FSM_BM_NONE));
1010   }
1011 
1012   /* Reload fsm tree */
1013   _fsm_load_fsm_lw(fsm, mm, bmlen);
1014 
1015   /* Flush new meta */
1016   RCC(rc, rollback, _fsm_write_meta_lw(fsm));
1017   RCC(rc, rollback, pool->sync(pool, IWFS_FDATASYNC));
1018 
1019   if (old_bmlen) {
1020     /* Now we are save to deallocate the old bitmap */
1021     rc = _fsm_blk_deallocate_lw(fsm, (old_bmoff >> fsm->bpow), (old_bmlen >> fsm->bpow));
1022     if (!fsm->mmap_all) {
1023       pool->remove_mmap(pool, old_bmoff);
1024     }
1025   }
1026   return rc;
1027 
1028 rollback:
1029   /* try to rollback previous bitmap state */
1030   fsm->bmoff = old_bmoff;
1031   fsm->bmlen = old_bmlen;
1032   if (old_bmlen && mm2) {
1033     _fsm_load_fsm_lw(fsm, mm2, old_bmlen);
1034   }
1035   pool->sync(pool, IWFS_FDATASYNC);
1036   return rc;
1037 }
1038 
1039 /**
1040  * @brief Resize bitmap area.
1041  * @param fsm `structfsm
1042  * @param size New size of bitmap area in bytes.
1043  */
_fsm_resize_fsm_bitmap_lw(struct fsm * fsm,uint64_t size)1044 static iwrc _fsm_resize_fsm_bitmap_lw(struct fsm *fsm, uint64_t size) {
1045   iwrc rc;
1046   uint64_t bmoffset = 0, bmlen, sp;
1047   IWFS_EXT *pool = &fsm->pool;
1048 
1049   if (fsm->bmlen >= size) {
1050     return 0;
1051   }
1052   bmlen = IW_ROUNDUP(size, fsm->aunit); /* align to the system page size. */
1053   rc = _fsm_blk_allocate_aligned_lw(
1054     fsm, (bmlen >> fsm->bpow), &bmoffset, &sp, UINT64_MAX,
1055     IWFSM_ALLOC_NO_STATS | IWFSM_ALLOC_NO_EXTEND | IWFSM_ALLOC_NO_OVERALLOCATE);
1056   if (!rc) {
1057     bmoffset = bmoffset << fsm->bpow;
1058     bmlen = sp << fsm->bpow;
1059   } else if (rc == IWFS_ERROR_NO_FREE_SPACE) {
1060     bmoffset = fsm->bmlen * (1 << fsm->bpow) * 8;
1061     bmoffset = IW_ROUNDUP(bmoffset, fsm->aunit);
1062   }
1063   if (!fsm->mmap_all) {
1064     rc = pool->add_mmap(pool, bmoffset, bmlen, fsm->mmap_opts);
1065     RCRET(rc);
1066   }
1067   rc = _fsm_init_lw(fsm, bmoffset, bmlen);
1068   if (rc && !fsm->mmap_all) {
1069     pool->remove_mmap(pool, bmoffset);
1070   }
1071   return rc;
1072 }
1073 
1074 /**
1075  * @brief Allocate a continuous segment of blocks.
1076  *
1077  * @param fsm `struct fsm
1078  * @param length_blk Desired segment length in blocks.
1079  * @param [in,out] offset_blk Allocated segment offset in blocks will be stored into.
1080  *                It also specified the desired segment offset to provide allocation locality.
1081  * @param [out] olength_blk Assigned segment length in blocks.
1082  * @param opts
1083  */
_fsm_blk_allocate_lw(struct fsm * fsm,uint64_t length_blk,uint64_t * offset_blk,uint64_t * olength_blk,iwfs_fsm_aflags opts)1084 static iwrc _fsm_blk_allocate_lw(
1085   struct fsm     *fsm,
1086   uint64_t        length_blk,
1087   uint64_t       *offset_blk,
1088   uint64_t       *olength_blk,
1089   iwfs_fsm_aflags opts
1090   ) {
1091   iwrc rc;
1092   struct iwavl_node *nn;
1093   fsm_bmopts_t bopts = FSM_BM_NONE;
1094 
1095   if (opts & IWFSM_ALLOC_PAGE_ALIGNED) {
1096     while (1) {
1097       rc = _fsm_blk_allocate_aligned_lw(fsm, length_blk, offset_blk, olength_blk, UINT64_MAX, opts);
1098       if (rc == IWFS_ERROR_NO_FREE_SPACE) {
1099         if (opts & IWFSM_ALLOC_NO_EXTEND) {
1100           return IWFS_ERROR_NO_FREE_SPACE;
1101         }
1102         rc = _fsm_resize_fsm_bitmap_lw(fsm, fsm->bmlen << 1);
1103         RCRET(rc);
1104         continue;
1105       }
1106       if (!rc && (opts & IWFSM_SOLID_ALLOCATED_SPACE)) {
1107         uint64_t bs = *offset_blk;
1108         int64_t bl = *olength_blk;
1109         rc = _fsm_ensure_size_lw(fsm, (bs << fsm->bpow) + (bl << fsm->bpow));
1110       }
1111       return rc;
1112     }
1113   }
1114 
1115   *olength_blk = length_blk;
1116 
1117 start:
1118   nn = (struct iwavl_node*) _fsm_find_matching_fblock_lw(fsm, *offset_blk, length_blk, opts);
1119   if (nn) { /* use existing free space block */
1120     const struct bkey *nk = &BKEY(nn);
1121     uint64_t nlength = FSMBK_LENGTH(nk);
1122     *offset_blk = FSMBK_OFFSET(nk);
1123 
1124     _fsm_del_fbk2(fsm, nn);
1125 
1126     if (nlength > length_blk) { /* re-save rest of free-space */
1127       if (!(opts & IWFSM_ALLOC_NO_OVERALLOCATE) && fsm->crznum) {
1128         /* todo use lognormal distribution? */
1129         double_t d = ((double_t) fsm->crzsum / (double_t) fsm->crznum)        /*avg*/
1130                      - (double) (nlength - length_blk);                       /*rest blk size*/
1131         double_t s = ((double_t) fsm->crzvar / (double_t) fsm->crznum) * 6.0; /* blk size dispersion * 6 */
1132         if ((s > 1) && (d > 0) && (d * d > s)) {
1133           /* its better to attach rest of block to
1134              the record */
1135           *olength_blk = nlength;
1136         } else {
1137           _fsm_put_fbk(fsm, (*offset_blk + length_blk), (nlength - length_blk));
1138         }
1139       } else {
1140         _fsm_put_fbk(fsm, (*offset_blk + length_blk), (nlength - length_blk));
1141       }
1142     }
1143   } else {
1144     if (opts & IWFSM_ALLOC_NO_EXTEND) {
1145       return IWFS_ERROR_NO_FREE_SPACE;
1146     }
1147     rc = _fsm_resize_fsm_bitmap_lw(fsm, fsm->bmlen << 1);
1148     RCRET(rc);
1149     goto start;
1150   }
1151 
1152   if (fsm->oflags & IWFSM_STRICT) {
1153     bopts |= FSM_BM_STRICT;
1154   }
1155   rc = _fsm_set_bit_status_lw(fsm, *offset_blk, *olength_blk, 1, bopts);
1156   if (!rc && !(opts & IWFSM_ALLOC_NO_STATS)) {
1157     double_t avg;
1158     /* Update allocation statistics */
1159     if (fsm->crznum > FSM_MAX_STATS_COUNT) {
1160       fsm->crznum = 0;
1161       fsm->crzsum = 0;
1162       fsm->crzvar = 0;
1163     }
1164     ++fsm->crznum;
1165     fsm->crzsum += length_blk;
1166     avg = (double_t) fsm->crzsum / (double_t) fsm->crznum; /* average */
1167     fsm->crzvar
1168       += (uint64_t) (((double_t) length_blk - avg) * ((double_t) length_blk - avg) + 0.5L); /* variance */
1169   }
1170   if (!rc && (opts & IWFSM_SOLID_ALLOCATED_SPACE)) {
1171     uint64_t bs = *offset_blk;
1172     int64_t bl = *olength_blk;
1173     rc = _fsm_ensure_size_lw(fsm, (bs << fsm->bpow) + (bl << fsm->bpow));
1174   }
1175   if (!rc && (opts & IWFSM_SYNC_BMAP)) {
1176     uint64_t *bmptr;
1177     if (!_fsm_bmptr(fsm, &bmptr)) {
1178       IWFS_EXT *pool = &fsm->pool;
1179       rc = pool->sync_mmap(pool, fsm->bmoff, IWFS_SYNCDEFAULT);
1180     }
1181   }
1182   return rc;
1183 }
1184 
1185 /**
1186  * @brief Remove all free blocks from the and of file and trim its size.
1187  */
_fsm_trim_tail_lw(struct fsm * fsm)1188 static iwrc _fsm_trim_tail_lw(struct fsm *fsm) {
1189   iwrc rc;
1190   int hasleft;
1191   uint64_t length, lastblk, *bmptr;
1192   IWFS_EXT_STATE fstate;
1193   uint64_t offset = 0;
1194 
1195   if (!(fsm->omode & IWFS_OWRITE)) {
1196     return 0;
1197   }
1198   /* find free space for fsm with lesser offset than actual */
1199   rc = _fsm_blk_allocate_aligned_lw(
1200     fsm, (fsm->bmlen >> fsm->bpow), &offset, &length, (fsm->bmoff >> fsm->bpow),
1201     IWFSM_ALLOC_NO_EXTEND | IWFSM_ALLOC_NO_OVERALLOCATE | IWFSM_ALLOC_NO_STATS);
1202 
1203   if (rc && (rc != IWFS_ERROR_NO_FREE_SPACE)) {
1204     return rc;
1205   }
1206   if (rc) {
1207     rc = 0;
1208   } else if ((offset << fsm->bpow) < fsm->bmoff) {
1209     offset = offset << fsm->bpow;
1210     length = length << fsm->bpow;
1211     assert(offset != fsm->bmoff);
1212     fsm->pool.add_mmap(&fsm->pool, offset, length, fsm->mmap_opts);
1213     RCC(rc, finish, _fsm_init_lw(fsm, offset, length));
1214   } else {
1215     /* shoud never be reached */
1216     assert(0);
1217     RCC(rc, finish, _fsm_blk_deallocate_lw(fsm, offset, length));
1218   }
1219 
1220   RCC(rc, finish, _fsm_bmptr(fsm, &bmptr)); // -V519
1221 
1222   lastblk = (fsm->bmoff + fsm->bmlen) >> fsm->bpow;
1223   offset = _fsm_find_prev_set_bit(bmptr, (fsm->bmlen << 3), lastblk, &hasleft);
1224   if (hasleft) {
1225     lastblk = offset + 1;
1226   }
1227   rc = fsm->pool.state(&fsm->pool, &fstate);
1228   if (!rc && (fstate.fsize > (lastblk << fsm->bpow))) {
1229     rc = fsm->pool.truncate(&fsm->pool, lastblk << fsm->bpow);
1230   }
1231 
1232 finish:
1233   return rc;
1234 }
1235 
_fsm_init_impl(struct fsm * fsm,const IWFS_FSM_OPTS * opts)1236 static iwrc _fsm_init_impl(struct fsm *fsm, const IWFS_FSM_OPTS *opts) {
1237   fsm->oflags = opts->oflags;
1238   fsm->aunit = iwp_alloc_unit();
1239   fsm->bpow = opts->bpow;
1240   fsm->mmap_all = opts->mmap_all;
1241   if (!fsm->bpow) {
1242     fsm->bpow = 6;  // 64bit block
1243   } else if (fsm->bpow > FSM_MAX_BLOCK_POW) {
1244     return IWFS_ERROR_INVALID_BLOCK_SIZE;
1245   } else if ((1U << fsm->bpow) > fsm->aunit) {
1246     return IWFS_ERROR_PLATFORM_PAGE;
1247   }
1248   return 0;
1249 }
1250 
_fsm_init_locks(struct fsm * fsm,const IWFS_FSM_OPTS * opts)1251 static iwrc _fsm_init_locks(struct fsm *fsm, const IWFS_FSM_OPTS *opts) {
1252   if (opts->oflags & IWFSM_NOLOCKS) {
1253     fsm->ctlrwlk = 0;
1254     return 0;
1255   }
1256   fsm->ctlrwlk = calloc(1, sizeof(*fsm->ctlrwlk));
1257   if (!fsm->ctlrwlk) {
1258     return iwrc_set_errno(IW_ERROR_ALLOC, errno);
1259   }
1260   int rci = pthread_rwlock_init(fsm->ctlrwlk, 0);
1261   if (rci) {
1262     free(fsm->ctlrwlk);
1263     fsm->ctlrwlk = 0;
1264     return iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci);
1265   }
1266   return 0;
1267 }
1268 
_fsm_destroy_locks(struct fsm * fsm)1269 static iwrc _fsm_destroy_locks(struct fsm *fsm) {
1270   if (!fsm->ctlrwlk) {
1271     return 0;
1272   }
1273   iwrc rc = 0;
1274   int rci = pthread_rwlock_destroy(fsm->ctlrwlk);
1275   if (rci) {
1276     IWRC(iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci), rc);
1277   }
1278   free(fsm->ctlrwlk);
1279   fsm->ctlrwlk = 0;
1280   return rc;
1281 }
1282 
_fsm_read_meta_lr(struct fsm * fsm)1283 static iwrc _fsm_read_meta_lr(struct fsm *fsm) {
1284   iwrc rc;
1285   uint32_t lv;
1286   uint64_t llv;
1287   size_t sp, rp = 0;
1288   uint8_t hdr[IWFSM_CUSTOM_HDR_DATA_OFFSET] = { 0 };
1289 
1290   /*
1291       [FSM_CTL_MAGICK u32][block pow u8]
1292       [bmoffset u64][bmlength u64]
1293       [u64 crzsum][u32 crznum][u64 crszvar][u256 reserved]
1294       [custom header size u32][custom header data...]
1295       [fsm data...]
1296    */
1297 
1298   rc = fsm->pool.read(&fsm->pool, 0, hdr, IWFSM_CUSTOM_HDR_DATA_OFFSET, &sp);
1299   if (rc) {
1300     iwlog_ecode_error3(rc);
1301     return rc;
1302   }
1303 
1304   /* Magic */
1305   memcpy(&lv, hdr + rp, sizeof(lv)); // -V512
1306   lv = IW_ITOHL(lv);
1307   if (lv != IWFSM_MAGICK) {
1308     rc = IWFS_ERROR_INVALID_FILEMETA;
1309     iwlog_ecode_error2(rc, "Invalid file magic number");
1310     return rc;
1311   }
1312   rp += sizeof(lv);
1313 
1314   /* Block pow */
1315   memcpy(&fsm->bpow, hdr + rp, sizeof(fsm->bpow));
1316   rp += sizeof(fsm->bpow);
1317 
1318   if (fsm->bpow > FSM_MAX_BLOCK_POW) {
1319     rc = IWFS_ERROR_INVALID_FILEMETA;
1320     iwlog_ecode_error(rc, "Invalid file blocks pow: %u", fsm->bpow);
1321     return rc;
1322   }
1323   if ((1U << fsm->bpow) > fsm->aunit) {
1324     rc = IWFS_ERROR_PLATFORM_PAGE;
1325     iwlog_ecode_error(rc, "Block size: %u must not be greater than system page size: %zu",
1326                       (1U << fsm->bpow), fsm->aunit);
1327   }
1328 
1329   /* Free-space bitmap offset */
1330   memcpy(&llv, hdr + rp, sizeof(llv));
1331   llv = IW_ITOHLL(llv);
1332   fsm->bmoff = llv;
1333   rp += sizeof(llv);
1334 
1335   /* Free-space bitmap length */
1336   memcpy(&llv, hdr + rp, sizeof(llv));
1337   llv = IW_ITOHLL(llv);
1338   fsm->bmlen = llv;
1339   if (llv & (64 - 1)) {
1340     rc = IWFS_ERROR_INVALID_FILEMETA;
1341     iwlog_ecode_error(rc, "Free-space bitmap length is not 64bit aligned: %" PRIuMAX "", fsm->bmlen);
1342   }
1343   rp += sizeof(llv);
1344 
1345   /* Cumulative sum of record sizes acquired by `allocate` */
1346   memcpy(&llv, hdr + rp, sizeof(llv));
1347   llv = IW_ITOHLL(llv);
1348   fsm->crzsum = llv;
1349   rp += sizeof(llv);
1350 
1351   /* Cumulative number of records acquired by `allocated` */
1352   memcpy(&lv, hdr + rp, sizeof(lv));
1353   lv = IW_ITOHL(lv);
1354   fsm->crznum = lv;
1355   rp += sizeof(lv);
1356 
1357   /* Record sizes standard variance (deviation^2 * N) */
1358   memcpy(&llv, hdr + rp, sizeof(llv));
1359   llv = IW_ITOHLL(llv);
1360   fsm->crzvar = llv;
1361   rp += sizeof(llv);
1362 
1363   /* Reserved */
1364   rp += 32;
1365 
1366   /* Header size */
1367   memcpy(&lv, hdr + rp, sizeof(lv));
1368   lv = IW_ITOHL(lv);
1369   fsm->hdrlen = lv;
1370   rp += sizeof(lv);
1371 
1372   assert(rp == IWFSM_CUSTOM_HDR_DATA_OFFSET);
1373   return rc;
1374 }
1375 
_fsm_init_new_lw(struct fsm * fsm,const IWFS_FSM_OPTS * opts)1376 static iwrc _fsm_init_new_lw(struct fsm *fsm, const IWFS_FSM_OPTS *opts) {
1377   FSM_ENSURE_OPEN(fsm);
1378   iwrc rc;
1379   uint64_t bmlen, bmoff;
1380   IWFS_EXT *pool = &fsm->pool;
1381   assert(fsm->aunit && fsm->bpow);
1382 
1383   fsm->hdrlen = opts->hdrlen + IWFSM_CUSTOM_HDR_DATA_OFFSET;
1384   fsm->hdrlen = IW_ROUNDUP(fsm->hdrlen, 1ULL << fsm->bpow);
1385   bmlen = opts->bmlen > 0 ? IW_ROUNDUP(opts->bmlen, fsm->aunit) : fsm->aunit;
1386   bmoff = IW_ROUNDUP(fsm->hdrlen, fsm->aunit);
1387 
1388   if (fsm->mmap_all) {
1389     /* mmap whole file */
1390     rc = pool->add_mmap(pool, 0, SIZE_T_MAX, fsm->mmap_opts);
1391     RCRET(rc);
1392   } else {
1393     /* mmap header */
1394     rc = pool->add_mmap(pool, 0, fsm->hdrlen, fsm->mmap_opts);
1395     RCRET(rc);
1396     /* mmap the fsm bitmap index */
1397     rc = pool->add_mmap(pool, bmoff, bmlen, fsm->mmap_opts);
1398     RCRET(rc);
1399   }
1400   return _fsm_init_lw(fsm, bmoff, bmlen);
1401 }
1402 
_fsm_init_existing_lw(struct fsm * fsm)1403 static iwrc _fsm_init_existing_lw(struct fsm *fsm) {
1404   FSM_ENSURE_OPEN(fsm);
1405   iwrc rc;
1406   size_t sp;
1407   uint8_t *mm;
1408   IWFS_EXT *pool = &fsm->pool;
1409 
1410   RCC(rc, finish, _fsm_read_meta_lr(fsm));
1411 
1412   if (fsm->mmap_all) {
1413     /* mmap the whole file */
1414     RCC(rc, finish, pool->add_mmap(pool, 0, SIZE_T_MAX, fsm->mmap_opts));
1415     RCC(rc, finish, pool->probe_mmap(pool, 0, &mm, &sp));
1416     if (sp < fsm->bmoff + fsm->bmlen) {
1417       rc = IWFS_ERROR_NOT_MMAPED;
1418       goto finish;
1419     } else {
1420       mm += fsm->bmoff;
1421     }
1422   } else {
1423     /* mmap the header of file */
1424     RCC(rc, finish, pool->add_mmap(pool, 0, fsm->hdrlen, fsm->mmap_opts));
1425     /* mmap the fsm bitmap index */
1426     RCC(rc, finish, pool->add_mmap(pool, fsm->bmoff, fsm->bmlen, fsm->mmap_opts));
1427     RCC(rc, finish, pool->probe_mmap(pool, fsm->bmoff, &mm, &sp));
1428     if (sp < fsm->bmlen) {
1429       rc = IWFS_ERROR_NOT_MMAPED;
1430       goto finish;
1431     }
1432   }
1433 
1434   _fsm_load_fsm_lw(fsm, mm, fsm->bmlen);
1435 
1436 finish:
1437   return rc;
1438 }
1439 
1440 /**
1441  * @brief Check if all blocks within the specified range have been `allocated`.
1442  *
1443  * @param fsm `struct fsm`
1444  * @param offset_blk Starting block number of the specified range.
1445  * @param length_blk Range size in blocks.
1446  * @param [out] ret Checking result.
1447  */
_fsm_is_fully_allocated_lr(struct fsm * fsm,uint64_t offset_blk,uint64_t length_blk,int * ret)1448 static iwrc _fsm_is_fully_allocated_lr(struct fsm *fsm, uint64_t offset_blk, uint64_t length_blk, int *ret) {
1449   uint64_t end = offset_blk + length_blk;
1450   *ret = 1;
1451   if ((length_blk < 1) || (end < offset_blk) || (end > (fsm->bmlen << 3))) {
1452     *ret = 0;
1453     return 0;
1454   }
1455   iwrc rc = _fsm_set_bit_status_lw(fsm, offset_blk, length_blk, 0, FSM_BM_DRY_RUN | FSM_BM_STRICT);
1456   if (rc == IWFS_ERROR_FSM_SEGMENTATION) {
1457     *ret = 0;
1458     return 0;
1459   }
1460   return rc;
1461 }
1462 
1463 /*************************************************************************************************
1464 *                                  Public API *
1465 *************************************************************************************************/
1466 
_fsm_write(struct IWFS_FSM * f,off_t off,const void * buf,size_t siz,size_t * sp)1467 static iwrc _fsm_write(struct IWFS_FSM *f, off_t off, const void *buf, size_t siz, size_t *sp) {
1468   FSM_ENSURE_OPEN2(f);
1469   struct fsm *fsm = f->impl;
1470   iwrc rc = _fsm_ctrl_rlock(fsm);
1471   RCRET(rc);
1472   if (fsm->oflags & IWFSM_STRICT) {
1473     int allocated = 0;
1474     IWRC(_fsm_is_fully_allocated_lr(fsm,
1475                                     (uint64_t) off >> fsm->bpow,
1476                                     IW_ROUNDUP(siz, 1ULL << fsm->bpow) >> fsm->bpow,
1477                                     &allocated), rc);
1478     if (!rc) {
1479       if (!allocated) {
1480         rc = IWFS_ERROR_FSM_SEGMENTATION;
1481       } else {
1482         rc = fsm->pool.write(&fsm->pool, off, buf, siz, sp);
1483       }
1484     }
1485   } else {
1486     rc = fsm->pool.write(&fsm->pool, off, buf, siz, sp);
1487   }
1488   _fsm_ctrl_unlock(fsm);
1489   return rc;
1490 }
1491 
_fsm_read(struct IWFS_FSM * f,off_t off,void * buf,size_t siz,size_t * sp)1492 static iwrc _fsm_read(struct IWFS_FSM *f, off_t off, void *buf, size_t siz, size_t *sp) {
1493   FSM_ENSURE_OPEN2(f);
1494   struct fsm *fsm = f->impl;
1495   iwrc rc = _fsm_ctrl_rlock(fsm);
1496   RCRET(rc);
1497   if (fsm->oflags & IWFSM_STRICT) {
1498     int allocated = 0;
1499     IWRC(_fsm_is_fully_allocated_lr(fsm, (uint64_t) off >> fsm->bpow,
1500                                     IW_ROUNDUP(siz, 1ULL << fsm->bpow) >> fsm->bpow,
1501                                     &allocated), rc);
1502     if (!rc) {
1503       if (!allocated) {
1504         rc = IWFS_ERROR_FSM_SEGMENTATION;
1505       } else {
1506         rc = fsm->pool.read(&fsm->pool, off, buf, siz, sp);
1507       }
1508     }
1509   } else {
1510     rc = fsm->pool.read(&fsm->pool, off, buf, siz, sp);
1511   }
1512   _fsm_ctrl_unlock(fsm);
1513   return rc;
1514 }
1515 
_fsm_sync(struct IWFS_FSM * f,iwfs_sync_flags flags)1516 static iwrc _fsm_sync(struct IWFS_FSM *f, iwfs_sync_flags flags) {
1517   FSM_ENSURE_OPEN2(f);
1518   iwrc rc = _fsm_ctrl_rlock(f->impl);
1519   RCRET(rc);
1520   IWRC(_fsm_write_meta_lw(f->impl), rc);
1521   IWRC(f->impl->pool.sync(&f->impl->pool, flags), rc);
1522   IWRC(_fsm_ctrl_unlock(f->impl), rc);
1523   return rc;
1524 }
1525 
_fsm_close(struct IWFS_FSM * f)1526 static iwrc _fsm_close(struct IWFS_FSM *f) {
1527   if (!f || !f->impl) {
1528     return 0;
1529   }
1530   iwrc rc = 0;
1531   struct fsm *fsm = f->impl;
1532   IWRC(_fsm_ctrl_wlock(fsm), rc);
1533   if (fsm->root && (fsm->omode & IWFS_OWRITE)) {
1534     if (!(fsm->oflags & IWFSM_NO_TRIM_ON_CLOSE)) {
1535       IWRC(_fsm_trim_tail_lw(fsm), rc);
1536     }
1537     IWRC(_fsm_write_meta_lw(fsm), rc);
1538     if (!fsm->dlsnr) {
1539       IWRC(fsm->pool.sync(&fsm->pool, IWFS_SYNCDEFAULT), rc);
1540     }
1541   }
1542   IWRC(fsm->pool.close(&fsm->pool), rc);
1543   _fsm_node_destroy(fsm->root);
1544   IWRC(_fsm_ctrl_unlock(fsm), rc);
1545   IWRC(_fsm_destroy_locks(fsm), rc);
1546   f->impl = 0;
1547   free(fsm);
1548   return rc;
1549 }
1550 
_fsm_ensure_size_lw(struct fsm * fsm,off_t size)1551 IW_INLINE iwrc _fsm_ensure_size_lw(struct fsm *fsm, off_t size) {
1552   return fsm->pool.ensure_size(&fsm->pool, size);
1553 }
1554 
_fsm_ensure_size(struct IWFS_FSM * f,off_t size)1555 static iwrc _fsm_ensure_size(struct IWFS_FSM *f, off_t size) {
1556   FSM_ENSURE_OPEN2(f);
1557   iwrc rc = _fsm_ctrl_rlock(f->impl);
1558   RCRET(rc);
1559   if (f->impl->bmoff + f->impl->bmlen > size) {
1560     rc = IWFS_ERROR_RESIZE_FAIL;
1561     goto finish;
1562   }
1563   rc = _fsm_ensure_size_lw(f->impl, size);
1564 
1565 finish:
1566   IWRC(_fsm_ctrl_unlock(f->impl), rc);
1567   return rc;
1568 }
1569 
_fsm_add_mmap(struct IWFS_FSM * f,off_t off,size_t maxlen,iwfs_ext_mmap_opts_t opts)1570 static iwrc _fsm_add_mmap(struct IWFS_FSM *f, off_t off, size_t maxlen, iwfs_ext_mmap_opts_t opts) {
1571   FSM_ENSURE_OPEN2(f);
1572   return f->impl->pool.add_mmap(&f->impl->pool, off, maxlen, opts);
1573 }
1574 
_fsm_remap_all(struct IWFS_FSM * f)1575 static iwrc _fsm_remap_all(struct IWFS_FSM *f) {
1576   FSM_ENSURE_OPEN2(f);
1577   return f->impl->pool.remap_all(&f->impl->pool);
1578 }
1579 
_fsm_acquire_mmap(struct IWFS_FSM * f,off_t off,uint8_t ** mm,size_t * sp)1580 iwrc _fsm_acquire_mmap(struct IWFS_FSM *f, off_t off, uint8_t **mm, size_t *sp) {
1581   return f->impl->pool.acquire_mmap(&f->impl->pool, off, mm, sp);
1582 }
1583 
_fsm_release_mmap(struct IWFS_FSM * f)1584 iwrc _fsm_release_mmap(struct IWFS_FSM *f) {
1585   return f->impl->pool.release_mmap(&f->impl->pool);
1586 }
1587 
_fsm_probe_mmap(struct IWFS_FSM * f,off_t off,uint8_t ** mm,size_t * sp)1588 static iwrc _fsm_probe_mmap(struct IWFS_FSM *f, off_t off, uint8_t **mm, size_t *sp) {
1589   FSM_ENSURE_OPEN2(f);
1590   return f->impl->pool.probe_mmap(&f->impl->pool, off, mm, sp);
1591 }
1592 
_fsm_remove_mmap(struct IWFS_FSM * f,off_t off)1593 static iwrc _fsm_remove_mmap(struct IWFS_FSM *f, off_t off) {
1594   FSM_ENSURE_OPEN2(f);
1595   return f->impl->pool.remove_mmap(&f->impl->pool, off);
1596 }
1597 
_fsm_sync_mmap(struct IWFS_FSM * f,off_t off,iwfs_sync_flags flags)1598 static iwrc _fsm_sync_mmap(struct IWFS_FSM *f, off_t off, iwfs_sync_flags flags) {
1599   FSM_ENSURE_OPEN2(f);
1600   return f->impl->pool.sync_mmap(&f->impl->pool, off, flags);
1601 }
1602 
_fsm_allocate(struct IWFS_FSM * f,off_t len,off_t * oaddr,off_t * olen,iwfs_fsm_aflags opts)1603 static iwrc _fsm_allocate(struct IWFS_FSM *f, off_t len, off_t *oaddr, off_t *olen, iwfs_fsm_aflags opts) {
1604   FSM_ENSURE_OPEN2(f);
1605   iwrc rc;
1606   uint64_t sbnum, nlen;
1607   struct fsm *fsm = f->impl;
1608 
1609   *olen = 0;
1610   if (!(fsm->omode & IWFS_OWRITE)) {
1611     return IW_ERROR_READONLY;
1612   }
1613   if (len <= 0) {
1614     return IW_ERROR_INVALID_ARGS;
1615   }
1616   /* Required blocks number */
1617   sbnum = (uint64_t) *oaddr >> fsm->bpow;
1618   len = IW_ROUNDUP(len, 1ULL << fsm->bpow);
1619 
1620   rc = _fsm_ctrl_wlock(fsm);
1621   RCRET(rc);
1622   rc = _fsm_blk_allocate_lw(f->impl, (uint64_t) len >> fsm->bpow, &sbnum, &nlen, opts);
1623   if (!rc) {
1624     *olen = (nlen << fsm->bpow);
1625     *oaddr = (sbnum << fsm->bpow);
1626   }
1627   IWRC(_fsm_ctrl_unlock(fsm), rc);
1628   return rc;
1629 }
1630 
_fsm_reallocate(struct IWFS_FSM * f,off_t nlen,off_t * oaddr,off_t * olen,iwfs_fsm_aflags opts)1631 static iwrc _fsm_reallocate(struct IWFS_FSM *f, off_t nlen, off_t *oaddr, off_t *olen, iwfs_fsm_aflags opts) {
1632   FSM_ENSURE_OPEN2(f);
1633   iwrc rc;
1634   struct fsm *fsm = f->impl;
1635 
1636   if (!(fsm->omode & IWFS_OWRITE)) {
1637     return IW_ERROR_READONLY;
1638   }
1639   if ((*oaddr & ((1ULL << fsm->bpow) - 1)) || (*olen & ((1ULL << fsm->bpow) - 1))) {
1640     return IWFS_ERROR_RANGE_NOT_ALIGNED;
1641   }
1642   uint64_t sp;
1643   uint64_t nlen_blk = IW_ROUNDUP((uint64_t) nlen, 1ULL << fsm->bpow) >> fsm->bpow;
1644   uint64_t olen_blk = (uint64_t) *olen >> fsm->bpow;
1645   uint64_t oaddr_blk = (uint64_t) *oaddr >> fsm->bpow;
1646   uint64_t naddr_blk = oaddr_blk;
1647 
1648   if (nlen_blk == olen_blk) {
1649     return 0;
1650   }
1651   rc = _fsm_ctrl_wlock(fsm);
1652   RCRET(rc);
1653   if (nlen_blk < olen_blk) {
1654     rc = _fsm_blk_deallocate_lw(fsm, oaddr_blk + nlen_blk, olen_blk - nlen_blk);
1655     if (!rc) {
1656       *oaddr = oaddr_blk << fsm->bpow;
1657       *olen = nlen_blk << fsm->bpow;
1658     }
1659   } else {
1660     RCC(rc, finish, _fsm_blk_allocate_lw(fsm, nlen_blk, &naddr_blk, &sp, opts));
1661     if (naddr_blk != oaddr_blk) {
1662       RCC(rc, finish, fsm->pool.copy(&fsm->pool, *oaddr, (size_t) *olen, naddr_blk << fsm->bpow));
1663     }
1664     RCC(rc, finish, _fsm_blk_deallocate_lw(fsm, oaddr_blk, olen_blk));
1665     *oaddr = naddr_blk << fsm->bpow;
1666     *olen = sp << fsm->bpow;
1667   }
1668 
1669 finish:
1670   IWRC(_fsm_ctrl_unlock(fsm), rc);
1671   return rc;
1672 }
1673 
_fsm_deallocate(struct IWFS_FSM * f,off_t addr,off_t len)1674 static iwrc _fsm_deallocate(struct IWFS_FSM *f, off_t addr, off_t len) {
1675   FSM_ENSURE_OPEN2(f);
1676   iwrc rc;
1677   struct fsm *fsm = f->impl;
1678   off_t offset_blk = (uint64_t) addr >> fsm->bpow;
1679   off_t length_blk = (uint64_t) len >> fsm->bpow;
1680 
1681   if (!(fsm->omode & IWFS_OWRITE)) {
1682     return IW_ERROR_READONLY;
1683   }
1684   if (addr & ((1ULL << fsm->bpow) - 1)) {
1685     return IWFS_ERROR_RANGE_NOT_ALIGNED;
1686   }
1687   rc = _fsm_ctrl_wlock(fsm);
1688   RCRET(rc);
1689   if (  IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, 0, (fsm->hdrlen >> fsm->bpow))
1690      || IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, (fsm->bmoff >> fsm->bpow),
1691                           (fsm->bmoff >> fsm->bpow) + (fsm->bmlen >> fsm->bpow))) {
1692     // Deny deallocations in header or free-space bitmap itself
1693     IWRC(_fsm_ctrl_unlock(fsm), rc);
1694     return IWFS_ERROR_FSM_SEGMENTATION;
1695   }
1696   rc = _fsm_blk_deallocate_lw(fsm, (uint64_t) offset_blk, (uint64_t) length_blk);
1697   IWRC(_fsm_ctrl_unlock(fsm), rc);
1698   return rc;
1699 }
1700 
_fsm_check_allocation_status(struct IWFS_FSM * f,off_t addr,off_t len,bool allocated)1701 static iwrc _fsm_check_allocation_status(struct IWFS_FSM *f, off_t addr, off_t len, bool allocated) {
1702   struct fsm *fsm = f->impl;
1703   if ((addr & ((1ULL << fsm->bpow) - 1)) || (len & ((1ULL << fsm->bpow) - 1))) {
1704     return IWFS_ERROR_RANGE_NOT_ALIGNED;
1705   }
1706   iwrc rc = _fsm_ctrl_rlock(fsm);
1707   RCRET(rc);
1708   off_t offset_blk = (uint64_t) addr >> fsm->bpow;
1709   off_t length_blk = (uint64_t) len >> fsm->bpow;
1710   if (  IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, 0, (fsm->hdrlen >> fsm->bpow))
1711      || IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, (fsm->bmoff >> fsm->bpow),
1712                           (fsm->bmoff >> fsm->bpow) + (fsm->bmlen >> fsm->bpow))) {
1713     IWRC(_fsm_ctrl_unlock(fsm), rc);
1714     return IWFS_ERROR_FSM_SEGMENTATION;
1715   }
1716   rc = _fsm_set_bit_status_lw(fsm, (uint64_t) offset_blk, (uint64_t) length_blk,
1717                               allocated ? 0 : 1, FSM_BM_DRY_RUN | FSM_BM_STRICT);
1718   IWRC(_fsm_ctrl_unlock(fsm), rc);
1719   return rc;
1720 }
1721 
_fsm_writehdr(struct IWFS_FSM * f,off_t off,const void * buf,off_t siz)1722 static iwrc _fsm_writehdr(struct IWFS_FSM *f, off_t off, const void *buf, off_t siz) {
1723   FSM_ENSURE_OPEN2(f);
1724   iwrc rc;
1725   uint8_t *mm;
1726   if (siz < 1) {
1727     return 0;
1728   }
1729   struct fsm *fsm = f->impl;
1730   if ((IWFSM_CUSTOM_HDR_DATA_OFFSET + off + siz) > fsm->hdrlen) {
1731     return IW_ERROR_OUT_OF_BOUNDS;
1732   }
1733   rc = fsm->pool.acquire_mmap(&fsm->pool, 0, &mm, 0);
1734   if (!rc) {
1735     if (fsm->dlsnr) {
1736       rc = fsm->dlsnr->onwrite(fsm->dlsnr, IWFSM_CUSTOM_HDR_DATA_OFFSET + off, buf, siz, 0);
1737     }
1738     memmove(mm + IWFSM_CUSTOM_HDR_DATA_OFFSET + off, buf, (size_t) siz);
1739     IWRC(fsm->pool.release_mmap(&fsm->pool), rc);
1740   }
1741   return rc;
1742 }
1743 
_fsm_readhdr(struct IWFS_FSM * f,off_t off,void * buf,off_t siz)1744 static iwrc _fsm_readhdr(struct IWFS_FSM *f, off_t off, void *buf, off_t siz) {
1745   FSM_ENSURE_OPEN2(f);
1746   iwrc rc;
1747   uint8_t *mm;
1748   if (siz < 1) {
1749     return 0;
1750   }
1751   struct fsm *fsm = f->impl;
1752   if ((IWFSM_CUSTOM_HDR_DATA_OFFSET + off + siz) > fsm->hdrlen) {
1753     return IW_ERROR_OUT_OF_BOUNDS;
1754   }
1755   rc = fsm->pool.acquire_mmap(&fsm->pool, 0, &mm, 0);
1756   if (!rc) {
1757     memmove(buf, mm + IWFSM_CUSTOM_HDR_DATA_OFFSET + off, (size_t) siz);
1758     rc = fsm->pool.release_mmap(&fsm->pool);
1759   }
1760   return rc;
1761 }
1762 
_fsm_clear(struct IWFS_FSM * f,iwfs_fsm_clrfalgs clrflags)1763 static iwrc _fsm_clear(struct IWFS_FSM *f, iwfs_fsm_clrfalgs clrflags) {
1764   FSM_ENSURE_OPEN2(f);
1765   struct fsm *fsm = f->impl;
1766   uint64_t bmoff, bmlen;
1767   iwrc rc = _fsm_ctrl_wlock(fsm);
1768   bmlen = fsm->bmlen;
1769   if (!bmlen) {
1770     goto finish;
1771   }
1772   if (!fsm->mmap_all && fsm->bmoff) {
1773     IWRC(fsm->pool.remove_mmap(&fsm->pool, fsm->bmoff), rc);
1774   }
1775   bmoff = IW_ROUNDUP(fsm->hdrlen, fsm->aunit);
1776   if (!fsm->mmap_all) {
1777     IWRC(fsm->pool.add_mmap(&fsm->pool, bmoff, bmlen, fsm->mmap_opts), rc);
1778   }
1779   RCGO(rc, finish);
1780   fsm->bmlen = 0;
1781   fsm->bmoff = 0;
1782   rc = _fsm_init_lw(fsm, bmoff, bmlen);
1783   if (!rc && (clrflags & IWFSM_CLEAR_TRIM)) {
1784     rc = _fsm_trim_tail_lw(fsm);
1785   }
1786 
1787 finish:
1788   IWRC(_fsm_ctrl_unlock(fsm), rc);
1789   return rc;
1790 }
1791 
_fsm_extfile(struct IWFS_FSM * f,IWFS_EXT ** ext)1792 static iwrc _fsm_extfile(struct IWFS_FSM *f, IWFS_EXT **ext) {
1793   FSM_ENSURE_OPEN2(f);
1794   *ext = &f->impl->pool;
1795   return 0;
1796 }
1797 
_fsm_state(struct IWFS_FSM * f,IWFS_FSM_STATE * state)1798 static iwrc _fsm_state(struct IWFS_FSM *f, IWFS_FSM_STATE *state) {
1799   FSM_ENSURE_OPEN2(f);
1800   struct fsm *fsm = f->impl;
1801   iwrc rc = _fsm_ctrl_rlock(fsm);
1802   memset(state, 0, sizeof(*state));
1803   IWRC(fsm->pool.state(&fsm->pool, &state->exfile), rc);
1804   state->block_size = 1U << fsm->bpow;
1805   state->oflags = fsm->oflags;
1806   state->hdrlen = fsm->hdrlen;
1807   state->blocks_num = fsm->bmlen << 3;
1808   state->free_segments_num = fsm->fsmnum;
1809   state->avg_alloc_size = fsm->crznum > 0 ? (double_t) fsm->crzsum / (double_t) fsm->crznum : 0;
1810   state->alloc_dispersion = fsm->crznum > 0 ? (double_t) fsm->crzvar / (double_t) fsm->crznum : 0;
1811   IWRC(_fsm_ctrl_unlock(fsm), rc);
1812   return rc;
1813 }
1814 
iwfs_fsmfile_open(IWFS_FSM * f,const IWFS_FSM_OPTS * opts)1815 iwrc iwfs_fsmfile_open(IWFS_FSM *f, const IWFS_FSM_OPTS *opts) {
1816   assert(f && opts);
1817   iwrc rc = 0;
1818   IWFS_EXT_STATE fstate = { 0 };
1819   const char *path = opts->exfile.file.path;
1820 
1821   memset(f, 0, sizeof(*f));
1822   RCC(rc, finish, iwfs_fsmfile_init());
1823 
1824   f->write = _fsm_write;
1825   f->read = _fsm_read;
1826   f->close = _fsm_close;
1827   f->sync = _fsm_sync;
1828   f->state = _fsm_state;
1829 
1830   f->ensure_size = _fsm_ensure_size;
1831   f->add_mmap = _fsm_add_mmap;
1832   f->remap_all = _fsm_remap_all;
1833   f->acquire_mmap = _fsm_acquire_mmap;
1834   f->probe_mmap = _fsm_probe_mmap;
1835   f->release_mmap = _fsm_release_mmap;
1836   f->remove_mmap = _fsm_remove_mmap;
1837   f->sync_mmap = _fsm_sync_mmap;
1838 
1839   f->allocate = _fsm_allocate;
1840   f->reallocate = _fsm_reallocate;
1841   f->deallocate = _fsm_deallocate;
1842   f->check_allocation_status = _fsm_check_allocation_status;
1843   f->writehdr = _fsm_writehdr;
1844   f->readhdr = _fsm_readhdr;
1845   f->clear = _fsm_clear;
1846   f->extfile = _fsm_extfile;
1847 
1848   if (!path) {
1849     return IW_ERROR_INVALID_ARGS;
1850   }
1851   struct fsm *fsm = f->impl = calloc(1, sizeof(*f->impl));
1852   if (!fsm) {
1853     return iwrc_set_errno(IW_ERROR_ALLOC, errno);
1854   }
1855   fsm->f = f;
1856   fsm->dlsnr = opts->exfile.file.dlsnr; // Copy data changes listener address
1857   fsm->mmap_opts = opts->mmap_opts;
1858 
1859   IWFS_EXT_OPTS rwl_opts = opts->exfile;
1860   rwl_opts.use_locks = !(opts->oflags & IWFSM_NOLOCKS);
1861 
1862   RCC(rc, finish, _fsm_init_impl(fsm, opts));
1863   RCC(rc, finish, _fsm_init_locks(fsm, opts));
1864   RCC(rc, finish, iwfs_exfile_open(&fsm->pool, &rwl_opts));
1865   RCC(rc, finish, fsm->pool.state(&fsm->pool, &fstate));
1866 
1867   fsm->omode = fstate.file.opts.omode;
1868 
1869   if (fstate.file.ostatus & IWFS_OPEN_NEW) {
1870     rc = _fsm_init_new_lw(fsm, opts);
1871   } else {
1872     rc = _fsm_init_existing_lw(fsm);
1873   }
1874 
1875 finish:
1876   if (rc) {
1877     if (f->impl) {
1878       IWRC(_fsm_destroy_locks(f->impl), rc);  // we are not locked
1879       IWRC(_fsm_close(f), rc);
1880     }
1881   }
1882   return rc;
1883 }
1884 
_fsmfile_ecodefn(locale_t locale,uint32_t ecode)1885 static const char* _fsmfile_ecodefn(locale_t locale, uint32_t ecode) {
1886   if (!((ecode > _IWFS_FSM_ERROR_START) && (ecode < _IWFS_FSM_ERROR_END))) {
1887     return 0;
1888   }
1889   switch (ecode) {
1890     case IWFS_ERROR_NO_FREE_SPACE:
1891       return "No free space. (IWFS_ERROR_NO_FREE_SPACE)";
1892     case IWFS_ERROR_INVALID_BLOCK_SIZE:
1893       return "Invalid block size specified. (IWFS_ERROR_INVALID_BLOCK_SIZE)";
1894     case IWFS_ERROR_RANGE_NOT_ALIGNED:
1895       return "Specified range/offset is not aligned with page/block. "
1896              "(IWFS_ERROR_RANGE_NOT_ALIGNED)";
1897     case IWFS_ERROR_FSM_SEGMENTATION:
1898       return "Free-space map segmentation error. (IWFS_ERROR_FSM_SEGMENTATION)";
1899     case IWFS_ERROR_INVALID_FILEMETA:
1900       return "Invalid file metadata. (IWFS_ERROR_INVALID_FILEMETA)";
1901     case IWFS_ERROR_PLATFORM_PAGE:
1902       return "The block size incompatible with platform page size, data "
1903              "migration required. (IWFS_ERROR_PLATFORM_PAGE)";
1904     case IWFS_ERROR_RESIZE_FAIL:
1905       return "Failed to resize file, "
1906              "conflicting with free-space map location (IWFS_ERROR_RESIZE_FAIL)";
1907     default:
1908       break;
1909   }
1910   return 0;
1911 }
1912 
iwfs_fsmfile_init(void)1913 iwrc iwfs_fsmfile_init(void) {
1914   static int _fsmfile_initialized = 0;
1915   iwrc rc = iw_init();
1916   RCRET(rc);
1917   if (!__sync_bool_compare_and_swap(&_fsmfile_initialized, 0, 1)) {
1918     return 0;  // initialized already
1919   }
1920   return iwlog_register_ecodefn(_fsmfile_ecodefn);
1921 }
1922 
1923 /*************************************************************************************************
1924 *                                      Debug API                                                *
1925 *************************************************************************************************/
1926 
iwfs_fsmdbg_number_of_free_areas(IWFS_FSM * f)1927 uint64_t iwfs_fsmdbg_number_of_free_areas(IWFS_FSM *f) {
1928   struct fsm *fsm = f->impl;
1929   return fsm->fsmnum;
1930 }
1931 
iwfs_fsmdbg_find_next_set_bit(const uint64_t * addr,uint64_t offset_bit,uint64_t max_offset_bit,int * found)1932 uint64_t iwfs_fsmdbg_find_next_set_bit(
1933   const uint64_t *addr, uint64_t offset_bit, uint64_t max_offset_bit,
1934   int *found
1935   ) {
1936   return _fsm_find_next_set_bit(addr, offset_bit, max_offset_bit, found);
1937 }
1938 
iwfs_fsmdbg_find_prev_set_bit(const uint64_t * addr,uint64_t offset_bit,uint64_t min_offset_bit,int * found)1939 uint64_t iwfs_fsmdbg_find_prev_set_bit(
1940   const uint64_t *addr, uint64_t offset_bit, uint64_t min_offset_bit,
1941   int *found
1942   ) {
1943   return _fsm_find_prev_set_bit(addr, offset_bit, min_offset_bit, found);
1944 }
1945 
iwfs_fsmdbg_dump_fsm_tree(IWFS_FSM * f,const char * hdr)1946 void iwfs_fsmdbg_dump_fsm_tree(IWFS_FSM *f, const char *hdr) {
1947   assert(f);
1948   struct fsm *fsm = f->impl;
1949   fprintf(stderr, "FSM TREE: %s\n", hdr);
1950   if (!fsm->root) {
1951     fprintf(stderr, "NONE\n");
1952     return;
1953   }
1954   for (struct iwavl_node *n = iwavl_first_in_order(fsm->root); n; n = iwavl_next_in_order(n)) {
1955     struct bkey *k = &BKEY(n);
1956     uint64_t koff = FSMBK_OFFSET(k);
1957     uint64_t klen = FSMBK_LENGTH(k);
1958     fprintf(stderr, "[%" PRIu64 " %" PRIu64 "]\n", koff, klen);
1959   }
1960 }
1961 
byte_to_binary(int x)1962 const char* byte_to_binary(int x) {
1963   static char b[9];
1964   b[0] = '\0';
1965   int z;
1966   for (z = 1; z <= 128; z <<= 1) {
1967     strcat(b, ((x & z) == z) ? "1" : "0");
1968   }
1969   return b;
1970 }
1971 
iwfs_fsmdb_dump_fsm_bitmap(IWFS_FSM * f)1972 iwrc iwfs_fsmdb_dump_fsm_bitmap(IWFS_FSM *f) {
1973   assert(f);
1974   size_t sp;
1975   uint8_t *mm;
1976   struct fsm *fsm = f->impl;
1977   iwrc rc;
1978   if (fsm->mmap_all) {
1979     rc = fsm->pool.probe_mmap(&fsm->pool, 0, &mm, &sp);
1980     if (!rc) {
1981       if (sp <= fsm->bmoff) {
1982         rc = IWFS_ERROR_NOT_MMAPED;
1983       } else {
1984         mm += fsm->bmoff;
1985         sp = sp - fsm->bmoff;
1986       }
1987     }
1988   } else {
1989     rc = fsm->pool.probe_mmap(&fsm->pool, fsm->bmoff, &mm, &sp);
1990   }
1991   if (rc) {
1992     iwlog_ecode_error3(rc);
1993     return rc;
1994   }
1995   int i = ((fsm->hdrlen >> fsm->bpow) >> 3);
1996   // if (impl->bmoff == impl->aunit) {
1997   //   i += ((impl->bmlen >> impl->bpow) >> 3);
1998   // }
1999   for ( ; i < sp && i < fsm->bmlen; ++i) {
2000     uint8_t b = *(mm + i);
2001     fprintf(stderr, "%s", byte_to_binary(b));
2002   }
2003   printf("\n");
2004   return 0;
2005 }
2006 
iwfs_fsmdbg_state(IWFS_FSM * f,IWFS_FSMDBG_STATE * d)2007 iwrc iwfs_fsmdbg_state(IWFS_FSM *f, IWFS_FSMDBG_STATE *d) {
2008   FSM_ENSURE_OPEN2(f);
2009   struct fsm *fsm = f->impl;
2010   iwrc rc = _fsm_ctrl_rlock(fsm);
2011   memset(d, 0, sizeof(*d));
2012   IWRC(fsm->pool.state(&fsm->pool, &d->state.exfile), rc);
2013   d->state.block_size = 1U << fsm->bpow;
2014   d->state.oflags = fsm->oflags;
2015   d->state.hdrlen = fsm->hdrlen;
2016   d->state.blocks_num = fsm->bmlen << 3;
2017   d->state.free_segments_num = fsm->fsmnum;
2018   d->state.avg_alloc_size = fsm->crznum > 0 ? (double_t) fsm->crzsum / (double_t) fsm->crznum : 0;
2019   d->state.alloc_dispersion = fsm->crznum > 0 ? (double_t) fsm->crzvar / (double_t) fsm->crznum : 0;
2020   d->bmoff = fsm->bmoff;
2021   d->bmlen = fsm->bmlen;
2022   d->lfbkoff = fsm->lfbkoff;
2023   d->lfbklen = fsm->lfbklen;
2024   IWRC(_fsm_ctrl_unlock(fsm), rc);
2025   return rc;
2026 }
2027