1 /**************************************************************************************************
2 * IOWOW library
3 *
4 * MIT License
5 *
6 * Copyright (c) 2012-2020 Softmotions Ltd <info@softmotions.com>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in all
16 * copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *************************************************************************************************/
26
27 #include "iwcfg.h"
28 #include "platform/iwp.h"
29 #include "log/iwlog.h"
30 #include "iwfsmfile.h"
31 #include "utils/iwutils.h"
32 #include "utils/kbtree.h"
33 #include "utils/iwbits.h"
34
35 #include <pthread.h>
36
37 typedef struct IWFS_FSM_IMPL FSM;
38
39 void iwfs_fsmdbg_dump_fsm_tree(IWFS_FSM *f, const char *hdr);
40
41 /**
42 * Free-space blocks-tree key.
43 */
44 typedef struct {
45 uint32_t off;
46 uint32_t len;
47 } FSMBK;
48
49 /** Additional options for `_fsm_set_bit_status_lw` routine */
50 typedef uint8_t fsm_bmopts_t;
51 /** No options. */
52 #define FSM_BM_NONE ((fsm_bmopts_t) 0x00U)
53 /** Do not modify bitmap. */
54 #define FSM_BM_DRY_RUN ((fsm_bmopts_t) 0x01U)
55 /** Perform strict checking of bitmap consistency */
56 #define FSM_BM_STRICT ((fsm_bmopts_t) 0x02U)
57
58 /* Maximum size of block: 1Mb */
59 #define FSM_MAX_BLOCK_POW 20
60
61 /* Maximum number of records used in allocation statistics */
62 #define FSM_MAX_STATS_COUNT 0x0000ffff
63
64 #define FSM_ENSURE_OPEN(impl_) \
65 if (!(impl_) || !(impl_)->f) return IW_ERROR_INVALID_STATE;
66
67 #define FSM_ENSURE_OPEN2(f_) \
68 if (!(f_) || !(f_)->impl) return IW_ERROR_INVALID_STATE;
69
70 #define FSMBK_OFFSET(b_) ((b_)->off)
71
72 #define FSMBK_LENGTH(b_) ((b_)->len)
73
74 ////////////////////////////////////////////////////////////////////////////////////////////////////
75
76 IW_INLINE int _fsm_cmp_ptr(const FSMBK *a, const FSMBK *b);
77 #define _fsm_cmp(a_, b_) (_fsm_cmp_ptr(&(a_), &(b_)))
78
79 // -V:KBTREE_INIT:522, 641
80 KBTREE_INIT(fsm, FSMBK, _fsm_cmp)
81
82 struct IWFS_FSM_IMPL {
83 IWFS_EXT pool; /**< Underlying rwl file. */
84 uint64_t bmlen; /**< Free-space bitmap block length in bytes. */
85 uint64_t bmoff; /**< Free-space bitmap block offset in bytes. */
86 uint64_t lfbkoff; /**< Offset in blocks of free block chunk with the largest offset. */
87 uint64_t lfbklen; /**< Length in blocks of free block chunk with the largest offset. */
88 uint64_t crzsum; /**< Cumulative sum all allocated blocks */
89 uint64_t crzvar; /**< Record sizes standard variance (deviation^2 * N) */
90 uint32_t hdrlen; /**< Length of custom file header */
91 uint32_t crznum; /**< Number of all allocated continuous areas acquired by `allocate` */
92 IWFS_FSM *f; /**< Self reference. */
93 IWDLSNR *dlsnr; /**< Data events listener */
94 kbtree_t(fsm) *fsm; /**< Free-space tree */
95 pthread_rwlock_t *ctlrwlk; /**< Methods RW lock */
96 size_t aunit; /**< System allocation unit size.
97 - Page size on *NIX
98 - Minimal allocation unit for WIN32 */
99 iwfs_fsm_openflags oflags; /**< Operation mode flags. */
100 iwfs_omode omode; /**< Open mode. */
101 uint8_t bpow; /**< Block size power for 2 */
102 bool mmap_all; /**< Mmap all file data */
103 iwfs_ext_mmap_opts_t mmap_opts; /**< Defaul mmap options used in `add_mmap` */
104 };
105
106 static iwrc _fsm_ensure_size_lw(FSM *impl, off_t size);
107
108 ////////////////////////////////////////////////////////////////////////////////////////////////////
109
_fsm_cmp_ptr(const FSMBK * a,const FSMBK * b)110 IW_INLINE int _fsm_cmp_ptr(const FSMBK *a, const FSMBK *b) {
111 int ret = ((FSMBK_LENGTH(b) < FSMBK_LENGTH(a)) - (FSMBK_LENGTH(a) < FSMBK_LENGTH(b)));
112 if (ret) {
113 return ret;
114 } else {
115 return ((FSMBK_OFFSET(b) < FSMBK_OFFSET(a)) - (FSMBK_OFFSET(a) < FSMBK_OFFSET(b)));
116 }
117 }
118
_fsm_ctrl_wlock(FSM * impl)119 IW_INLINE iwrc _fsm_ctrl_wlock(FSM *impl) {
120 int rci = impl->ctlrwlk ? pthread_rwlock_wrlock(impl->ctlrwlk) : 0;
121 return (rci ? iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci) : 0);
122 }
123
_fsm_ctrl_rlock(FSM * impl)124 IW_INLINE iwrc _fsm_ctrl_rlock(FSM *impl) {
125 int rci = impl->ctlrwlk ? pthread_rwlock_rdlock(impl->ctlrwlk) : 0;
126 return (rci ? iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci) : 0);
127 }
128
_fsm_ctrl_unlock(FSM * impl)129 IW_INLINE iwrc _fsm_ctrl_unlock(FSM *impl) {
130 int rci = impl->ctlrwlk ? pthread_rwlock_unlock(impl->ctlrwlk) : 0;
131 return (rci ? iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci) : 0);
132 }
133
_fsm_bmptr(FSM * impl,uint64_t ** bmptr)134 IW_INLINE iwrc _fsm_bmptr(FSM *impl, uint64_t **bmptr) {
135 size_t sp;
136 uint8_t *mm;
137 *bmptr = 0;
138 // get mmap pointer without locked
139 iwrc rc = impl->pool.probe_mmap(&impl->pool, impl->mmap_all ? 0 : impl->bmoff, &mm, &sp);
140 RCRET(rc);
141 if (impl->mmap_all) {
142 if (sp < impl->bmoff + impl->bmlen) {
143 return IWFS_ERROR_NOT_MMAPED;
144 }
145 *bmptr = (uint64_t *)(mm + impl->bmoff);
146 } else {
147 if (sp < impl->bmlen) {
148 return IWFS_ERROR_NOT_MMAPED;
149 }
150 *bmptr = (uint64_t *) mm;
151 }
152 return 0;
153 }
154
155 /**
156 * @brief Init the given @a bk key
157 * with given @a offset
158 * and @a length values.
159 */
_fsm_init_fbk(FSMBK * bk,uint64_t offset_blk,uint64_t len_blk)160 IW_INLINE WUR iwrc _fsm_init_fbk(FSMBK *bk, uint64_t offset_blk, uint64_t len_blk) {
161 if (offset_blk > ((uint32_t) - 1) ||
162 len_blk > ((uint32_t) - 1)) {
163 return IW_ERROR_OVERFLOW;
164 }
165 bk->off = (uint32_t) offset_blk;
166 bk->len = (uint32_t) len_blk;
167 return 0;
168 }
169
170 /**
171 * @brief Remove free space block from the fsm tree.
172 * @param impl `FSM`
173 * @param offset_blk Offset block number
174 * @param length_blk Number of blocks
175 */
_fsm_del_fbk(FSM * impl,uint64_t offset_blk,uint64_t length_blk)176 IW_INLINE iwrc _fsm_del_fbk(FSM *impl, uint64_t offset_blk, uint64_t length_blk) {
177 FSMBK fbk;
178 assert(length_blk);
179 iwrc rc = _fsm_init_fbk(&fbk, offset_blk, length_blk);
180 RCRET(rc);
181 #ifndef NDEBUG
182 int s2, s1 = kb_size(impl->fsm);
183 #endif
184 kb_delp(fsm, impl->fsm, &fbk);
185 #ifndef NDEBUG
186 s2 = kb_size(impl->fsm);
187 assert(s2 < s1);
188 #endif
189 if (FSMBK_OFFSET(&fbk) == impl->lfbkoff) {
190 impl->lfbkoff = 0;
191 impl->lfbklen = 0;
192 }
193 return 0;
194 }
195
196 /**
197 * @brief Deregister free-block chunk from the fsm tree.
198 * @param impl `FSM`
199 * @param fbk `FSMBK` Fsm tree key structure.
200 */
_fsm_del_fbk2(FSM * impl,const FSMBK fbk)201 IW_INLINE void _fsm_del_fbk2(FSM *impl, const FSMBK fbk) {
202 #ifndef NDEBUG
203 int s2, s1 = kb_size(impl->fsm);
204 #endif
205 kb_delp(fsm, impl->fsm, &fbk);
206 #ifndef NDEBUG
207 s2 = kb_size(impl->fsm);
208 assert(s2 < s1);
209 #endif
210 if (FSMBK_OFFSET(&fbk) == impl->lfbkoff) {
211 impl->lfbkoff = 0;
212 impl->lfbklen = 0;
213 }
214 }
215
216 /**
217 * @brief Register free space block in the fsm tree.
218 * @param impl `FSM`
219 * @param offset_blk Offset block number
220 * @param length_blk Number of blocks
221 */
_fsm_put_fbk(FSM * impl,uint64_t offset_blk,uint64_t length_blk)222 IW_INLINE iwrc _fsm_put_fbk(FSM *impl, uint64_t offset_blk, uint64_t length_blk) {
223 FSMBK fbk;
224 assert(length_blk);
225 iwrc rc = _fsm_init_fbk(&fbk, offset_blk, length_blk);
226 RCRET(rc);
227 kb_putp(fsm, impl->fsm, &fbk);
228 if (offset_blk + length_blk >= impl->lfbkoff + impl->lfbklen) {
229 impl->lfbkoff = offset_blk;
230 impl->lfbklen = length_blk;
231 }
232 return 0;
233 }
234
235 /**
236 * @brief Get the nearest free-space block.
237 * @param impl `FSM`
238 * @param offset_blk Desired offset in number of blocks.
239 * @param length_blk Desired free area size specified in blocks.
240 * @param opts Allocation opts
241 * @return `0` if matching block is not found.
242 */
_fsm_find_matching_fblock_lw(FSM * impl,uint64_t offset_blk,uint64_t length_blk,iwfs_fsm_aflags opts)243 IW_INLINE FSMBK *_fsm_find_matching_fblock_lw(FSM *impl,
244 uint64_t offset_blk,
245 uint64_t length_blk,
246 iwfs_fsm_aflags opts) {
247 FSMBK k, *uk, *lk;
248 iwrc rc = _fsm_init_fbk(&k, offset_blk, length_blk);
249 if (rc) {
250 iwlog_ecode_error3(rc);
251 return 0;
252 }
253
254 kb_intervalp(fsm, impl->fsm, &k, &lk, &uk);
255
256 uint64_t lklength = lk ? FSMBK_LENGTH(lk) : 0;
257 uint64_t uklength = uk ? FSMBK_LENGTH(uk) : 0;
258 if (lklength == length_blk) {
259 return lk;
260 } else if (uklength == length_blk) {
261 return uk;
262 }
263 if (lklength > uklength) {
264 return (lklength > length_blk) ? lk : 0;
265 } else {
266 return (uklength > length_blk) ? uk : 0;
267 }
268 }
269
270 /**
271 * @brief Set the allocation bits in the fsm bitmap.
272 *
273 * @param impl
274 * @param offset_bits Bit offset in the bitmap.
275 * @param length_bits Number of bits to set
276 * @param bit_status If `1` bits will be set to `1` otherwise `0`
277 * @param opts Operation options
278 */
_fsm_set_bit_status_lw(FSM * impl,const uint64_t offset_bits,const uint64_t length_bits_,const int bit_status,const fsm_bmopts_t opts)279 static iwrc _fsm_set_bit_status_lw(FSM *impl,
280 const uint64_t offset_bits,
281 const uint64_t length_bits_,
282 const int bit_status,
283 const fsm_bmopts_t opts) {
284 iwrc rc;
285 size_t sp;
286 uint8_t *mm;
287 register int64_t length_bits = length_bits_;
288 register uint64_t *p, set_mask;
289 uint64_t bend = offset_bits + length_bits;
290 int set_bits;
291
292 if (bend < offset_bits) { // overflow
293 return IW_ERROR_OUT_OF_BOUNDS;
294 }
295 assert(impl->bmlen * 8 >= offset_bits + length_bits);
296 if (impl->bmlen * 8 < offset_bits + length_bits) {
297 return IWFS_ERROR_FSM_SEGMENTATION;
298 }
299 if (impl->mmap_all) {
300 rc = impl->pool.probe_mmap(&impl->pool, 0, &mm, &sp);
301 RCRET(rc);
302 if (sp < impl->bmoff + impl->bmlen) {
303 return IWFS_ERROR_NOT_MMAPED;
304 } else {
305 mm += impl->bmoff;
306 }
307 } else {
308 rc = impl->pool.probe_mmap(&impl->pool, impl->bmoff, &mm, &sp);
309 RCRET(rc);
310 if (sp < impl->bmlen) {
311 return IWFS_ERROR_NOT_MMAPED;
312 }
313 }
314 p = ((uint64_t *) mm) + offset_bits / 64;
315 set_bits = 64 - (offset_bits & (64 - 1)); // NOLINT
316 set_mask = (~((uint64_t) 0) << (offset_bits & (64 - 1)));
317
318 #ifdef IW_BIGENDIAN
319 while (length_bits - set_bits >= 0) {
320 uint64_t pv = *p;
321 pv = IW_ITOHLL(pv);
322 if (bit_status) {
323 if ((opts & FSM_BM_STRICT) && (pv & set_mask)) {
324 rc = IWFS_ERROR_FSM_SEGMENTATION;
325 }
326 if ((opts & FSM_BM_DRY_RUN) == 0) {
327 pv |= set_mask;
328 *p = IW_HTOILL(pv);
329 }
330 } else {
331 if ((opts & FSM_BM_STRICT) && ((pv & set_mask) != set_mask)) {
332 rc = IWFS_ERROR_FSM_SEGMENTATION;
333 }
334 if ((opts & FSM_BM_DRY_RUN) == 0) {
335 pv &= ~set_mask;
336 *p = IW_HTOILL(pv);
337 }
338 }
339 length_bits -= set_bits;
340 set_bits = 64;
341 set_mask = ~((uint64_t) 0);
342 ++p;
343 }
344 if (length_bits) {
345 uint64_t pv = *p;
346 pv = IW_ITOHLL(pv);
347 set_mask &= (bend & (64 - 1)) ? ((((uint64_t) 1) << (bend & (64 - 1))) - 1) : ~((uint64_t) 0);
348 if (bit_status) {
349 if ((opts & FSM_BM_STRICT) && (pv & set_mask)) {
350 rc = IWFS_ERROR_FSM_SEGMENTATION;
351 }
352 if ((opts & FSM_BM_DRY_RUN) == 0) {
353 pv |= set_mask;
354 *p = IW_HTOILL(pv);
355 }
356 } else {
357 if ((opts & FSM_BM_STRICT) && (pv & set_mask) != set_mask) {
358 rc = IWFS_ERROR_FSM_SEGMENTATION;
359 }
360 if ((opts & FSM_BM_DRY_RUN) == 0) {
361 pv &= ~set_mask;
362 *p = IW_HTOILL(pv);
363 }
364 }
365 }
366 #else
367 while (length_bits - set_bits >= 0) {
368 if (bit_status) {
369 if ((opts & FSM_BM_STRICT) && (*p & set_mask)) {
370 rc = IWFS_ERROR_FSM_SEGMENTATION;
371 }
372 if ((opts & FSM_BM_DRY_RUN) == 0) {
373 *p |= set_mask;
374 }
375 } else {
376 if ((opts & FSM_BM_STRICT) && ((*p & set_mask) != set_mask)) {
377 rc = IWFS_ERROR_FSM_SEGMENTATION;
378 }
379 if ((opts & FSM_BM_DRY_RUN) == 0) {
380 *p &= ~set_mask;
381 }
382 }
383 length_bits -= set_bits;
384 set_bits = 64;
385 set_mask = ~((uint64_t) 0);
386 ++p;
387 }
388 if (length_bits) {
389 set_mask &= (bend & (64 - 1)) ? ((((uint64_t) 1) << (bend & (64 - 1))) - 1) : ~((uint64_t) 0);
390 if (bit_status) {
391 if ((opts & FSM_BM_STRICT) && (*p & set_mask)) {
392 rc = IWFS_ERROR_FSM_SEGMENTATION;
393 }
394 if ((opts & FSM_BM_DRY_RUN) == 0) {
395 *p |= set_mask;
396 }
397 } else {
398 if ((opts & FSM_BM_STRICT) && (*p & set_mask) != set_mask) {
399 rc = IWFS_ERROR_FSM_SEGMENTATION;
400 }
401 if ((opts & FSM_BM_DRY_RUN) == 0) {
402 *p &= ~set_mask;
403 }
404 }
405 }
406 #endif
407 if (!rc && impl->dlsnr) {
408 uint64_t so = offset_bits / 8;
409 uint64_t lb = length_bits_ + offset_bits % 8;
410 uint64_t dl = lb / 8;
411 if (lb % 8) ++dl;
412 rc = impl->dlsnr->onwrite(impl->dlsnr, impl->bmoff + so, mm + so, dl, 0);
413 }
414 return rc;
415 }
416
417 /**
418 * @brief Allocate a continuous segment of blocks with page aligned offset.
419 *
420 * @param impl `FSM`
421 * @param length_blk Desired segment length in blocks.
422 * @param [in,out] offset_blk Allocated segment offset in blocks will be stored into.
423 It also specified the desired segment offset to provide
424 * allocation locality.
425 * @param [out] olength_blk Assigned segment length in blocks.
426 * @param max_offset_blk Maximal offset of allocated block.
427 * @param opts Allocation options.
428 */
_fsm_blk_allocate_aligned_lw(FSM * impl,const uint64_t length_blk,uint64_t * offset_blk,uint64_t * olength_blk,const uint64_t max_offset_blk,const iwfs_fsm_aflags opts)429 static iwrc _fsm_blk_allocate_aligned_lw(FSM *impl,
430 const uint64_t length_blk,
431 uint64_t *offset_blk,
432 uint64_t *olength_blk,
433 const uint64_t max_offset_blk,
434 const iwfs_fsm_aflags opts) {
435 FSMBK *nk;
436 fsm_bmopts_t bopts = FSM_BM_NONE;
437 size_t aunit_blk = (impl->aunit >> impl->bpow);
438
439 assert(impl && impl->fsm && length_blk > 0);
440 if (impl->oflags & IWFSM_STRICT) {
441 bopts |= FSM_BM_STRICT;
442 }
443 *olength_blk = 0;
444 *offset_blk = 0;
445
446 /* First attempt */
447 nk = _fsm_find_matching_fblock_lw(impl, 0, length_blk + aunit_blk, opts);
448 if (!nk) {
449 nk = _fsm_find_matching_fblock_lw(impl, 0, length_blk, opts);
450 if (!nk) {
451 return IWFS_ERROR_NO_FREE_SPACE;
452 }
453 }
454 uint64_t akoff = FSMBK_OFFSET(nk);
455 uint64_t aklen = FSMBK_LENGTH(nk);
456 uint64_t noff = IW_ROUNDUP(akoff, aunit_blk);
457
458 if (noff <= max_offset_blk && (noff < aklen + akoff) && (aklen - (noff - akoff) >= length_blk)) {
459 _fsm_del_fbk(impl, akoff, aklen);
460 aklen = aklen - (noff - akoff);
461 if (noff > akoff) {
462 _fsm_put_fbk(impl, akoff, noff - akoff);
463 }
464 if (aklen > length_blk) {
465 _fsm_put_fbk(impl, noff + length_blk, aklen - length_blk);
466 }
467 *offset_blk = noff;
468 *olength_blk = length_blk;
469 return _fsm_set_bit_status_lw(impl, noff, length_blk, 1, bopts);
470 }
471
472 aklen = 0;
473 akoff = UINT64_MAX;
474 /* full scan */
475 #define _fsm_traverse(k) \
476 { \
477 uint64_t koff = FSMBK_OFFSET(k); \
478 uint64_t klen = FSMBK_LENGTH(k); \
479 if (koff < akoff) { \
480 noff = IW_ROUNDUP(koff, aunit_blk); \
481 if (noff <= max_offset_blk && (noff < klen + koff) && (klen - (noff - koff) >= length_blk)) { \
482 akoff = koff; \
483 aklen = klen; \
484 } \
485 } \
486 }
487 //-V:__kb_traverse:576, 701, 619, 769, 522
488 __kb_traverse(FSMBK, impl->fsm, _fsm_traverse);
489 #undef _fsm_traverse
490
491 if (akoff == UINT64_MAX) {
492 return IWFS_ERROR_NO_FREE_SPACE;
493 }
494 _fsm_del_fbk(impl, akoff, aklen);
495 noff = IW_ROUNDUP(akoff, aunit_blk);
496 aklen = aklen - (noff - akoff);
497 if (noff > akoff) {
498 _fsm_put_fbk(impl, akoff, noff - akoff);
499 }
500 if (aklen > length_blk) {
501 _fsm_put_fbk(impl, noff + length_blk, aklen - length_blk);
502 }
503 *offset_blk = noff;
504 *olength_blk = length_blk;
505 return _fsm_set_bit_status_lw(impl, noff, length_blk, 1, bopts);
506 }
507
508 /**
509 * @brief Load existing bitmap area into free-space search tree.
510 * @param impl `FSM`
511 * @param bm Bitmap area start ptr
512 * @param len Bitmap area length in bytes.
513 */
_fsm_load_fsm_lw(FSM * impl,const uint8_t * bm,uint64_t len)514 static void _fsm_load_fsm_lw(FSM *impl, const uint8_t *bm, uint64_t len) {
515 uint64_t cbnum = 0, fbklength = 0, fbkoffset = 0;
516 if (impl->fsm) {
517 // -V:kb_destroy:701, 769
518 kb_destroy(fsm, impl->fsm);
519 }
520 impl->fsm = kb_init(fsm, KB_DEFAULT_SIZE);
521 for (uint64_t b = 0; b < len; ++b) {
522 register uint8_t bb = bm[b];
523 if (bb == 0) {
524 fbklength += 8;
525 cbnum += 8;
526 } else if (bb == 0xffU) {
527 if (fbklength) {
528 fbkoffset = cbnum - fbklength;
529 _fsm_put_fbk(impl, fbkoffset, fbklength);
530 fbklength = 0;
531 }
532 cbnum += 8;
533 } else {
534 for (int i = 0; i < 8; ++i, ++cbnum) {
535 if (bb & (1U << i)) {
536 if (fbklength) {
537 fbkoffset = cbnum - fbklength;
538 _fsm_put_fbk(impl, fbkoffset, fbklength);
539 fbklength = 0;
540 }
541 } else {
542 ++fbklength;
543 }
544 }
545 }
546 }
547 if (fbklength > 0) {
548 fbkoffset = len * 8 - fbklength;
549 _fsm_put_fbk(impl, fbkoffset, fbklength);
550 }
551 }
552
553 /**
554 * @brief Flush a current `iwfsmfile` metadata into the file header.
555 * @param impl
556 * @param is_sync If `1` perform mmap sync.
557 * @return
558 */
_fsm_write_meta_lw(FSM * impl)559 static iwrc _fsm_write_meta_lw(FSM *impl) {
560 uint64_t llv;
561 size_t wlen;
562 uint32_t sp = 0, lv;
563 uint8_t hdr[IWFSM_CUSTOM_HDR_DATA_OFFSET] = {0};
564
565 /*
566 [FSM_CTL_MAGICK u32][block pow u8]
567 [bmoffset u64][bmlength u64]
568 [u64 crzsum][u32 crznum][u64 crszvar][u256 reserved]
569 [custom header size u32][custom header data...]
570 [fsm data...]
571 */
572
573 /* magic */
574 lv = IW_HTOIL(IWFSM_MAGICK);
575 assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
576 memcpy(hdr + sp, &lv, sizeof(lv));
577 sp += sizeof(lv);
578
579 /* block pow */
580 static_assert(sizeof(impl->bpow) == 1, "sizeof(impl->bpow) == 1");
581 assert(sp + sizeof(impl->bpow) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
582 memcpy(hdr + sp, &impl->bpow, sizeof(impl->bpow));
583 sp += sizeof(impl->bpow);
584
585 /* fsm bitmap block offset */
586 llv = impl->bmoff;
587 llv = IW_HTOILL(llv);
588 assert(sp + sizeof(llv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
589 memcpy(hdr + sp, &llv, sizeof(llv));
590 sp += sizeof(llv);
591
592 /* fsm bitmap block length */
593 llv = impl->bmlen;
594 llv = IW_HTOILL(llv);
595 assert(sp + sizeof(llv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
596 memcpy(hdr + sp, &llv, sizeof(llv));
597 sp += sizeof(llv);
598
599 /* Cumulative sum of record sizes acquired by `allocate` */
600 llv = impl->crzsum;
601 llv = IW_HTOILL(llv);
602 assert(sp + sizeof(llv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
603 memcpy(hdr + sp, &llv, sizeof(llv));
604 sp += sizeof(llv);
605
606 /* Cumulative number of records acquired by `allocated` */
607 lv = impl->crznum;
608 lv = IW_HTOIL(lv);
609 assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
610 memcpy(hdr + sp, &lv, sizeof(lv));
611 sp += sizeof(lv);
612
613 /* Record sizes standard variance (deviation^2 * N) */
614 llv = impl->crzvar;
615 llv = IW_HTOILL(llv);
616 assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
617 memcpy(hdr + sp, &llv, sizeof(llv));
618 sp += sizeof(llv);
619
620 /* Reserved */
621 sp += 32;
622
623 /* Size of header */
624 lv = impl->hdrlen;
625 lv = IW_HTOIL(lv);
626 assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
627 memcpy(hdr + sp, &lv, sizeof(lv));
628 sp += sizeof(lv);
629
630 assert(sp == IWFSM_CUSTOM_HDR_DATA_OFFSET);
631 return impl->pool.write(&impl->pool, 0, hdr, IWFSM_CUSTOM_HDR_DATA_OFFSET, &wlen);
632 }
633
634 /**
635 * @brief Search for the first next set bit position
636 * starting from the specified offset bit (INCLUDED).
637 */
_fsm_find_next_set_bit(const uint64_t * addr,register uint64_t offset_bit,const uint64_t max_offset_bit,int * found)638 static uint64_t _fsm_find_next_set_bit(const uint64_t *addr,
639 register uint64_t offset_bit,
640 const uint64_t max_offset_bit,
641 int *found) {
642 *found = 0;
643 register uint64_t bit, size;
644 register const uint64_t *p = addr + offset_bit / 64;
645 if (offset_bit >= max_offset_bit) {
646 return 0;
647 }
648 bit = offset_bit & (64 - 1);
649 offset_bit -= bit;
650 size = max_offset_bit - offset_bit;
651
652 #ifdef IW_BIGENDIAN
653 uint64_t pv = *p;
654 if (bit) {
655 pv = IW_ITOHLL(pv) & (~((uint64_t) 0) << bit);
656 if (pv) {
657 pv = iwbits_find_first_sbit64(pv);
658 if (pv >= size) {
659 return 0;
660 } else {
661 *found = 1;
662 return offset_bit + pv;
663 }
664 }
665 if (size <= 64) {
666 return 0;
667 }
668 offset_bit += 64;
669 size -= 64;
670 ++p;
671 }
672 while (size & ~(64 - 1)) {
673 pv = *(p++);
674 if (pv) {
675 *found = 1;
676 return offset_bit + iwbits_find_first_sbit64(IW_ITOHLL(pv));
677 }
678 offset_bit += 64;
679 size -= 64;
680 }
681 if (!size) {
682 return 0;
683 }
684 pv = *p;
685 pv = IW_ITOHLL(pv) & (~((uint64_t) 0) >> (64 - size));
686 if (pv) {
687 *found = 1;
688 return offset_bit + iwbits_find_first_sbit64(pv);
689 } else {
690 return 0;
691 }
692 #else
693 register uint64_t tmp;
694 if (bit) {
695 tmp = *p & (~((uint64_t) 0) << bit);
696 if (tmp) {
697 tmp = iwbits_find_first_sbit64(tmp);
698 if (tmp >= size) {
699 return 0;
700 } else {
701 *found = 1;
702 return offset_bit + tmp;
703 }
704 }
705 if (size <= 64) {
706 return 0;
707 }
708 offset_bit += 64;
709 size -= 64;
710 ++p;
711 }
712 while (size & ~(64 - 1)) {
713 if ((tmp = *(p++))) {
714 *found = 1;
715 return offset_bit + iwbits_find_first_sbit64(tmp);
716 }
717 offset_bit += 64;
718 size -= 64;
719 }
720 if (!size) {
721 return 0;
722 }
723 tmp = (*p) & (~((uint64_t) 0) >> (64 - size));
724 if (tmp) {
725 *found = 1;
726 return offset_bit + iwbits_find_first_sbit64(tmp);
727 } else {
728 return 0;
729 }
730 #endif
731 }
732
733 /**
734 * @brief Search for the first previous set bit position
735 * starting from the specified offset_bit (EXCLUDED).
736 */
_fsm_find_prev_set_bit(const uint64_t * addr,register uint64_t offset_bit,const uint64_t min_offset_bit,int * found)737 static uint64_t _fsm_find_prev_set_bit(const uint64_t *addr,
738 register uint64_t offset_bit,
739 const uint64_t min_offset_bit,
740 int *found) {
741 register const uint64_t *p;
742 register uint64_t tmp, bit, size;
743 *found = 0;
744 if (min_offset_bit >= offset_bit) {
745 return 0;
746 }
747 size = offset_bit - min_offset_bit;
748 bit = offset_bit & (64 - 1);
749 p = addr + offset_bit / 64;
750
751 #ifdef IW_BIGENDIAN
752 uint64_t pv;
753 if (bit) {
754 pv = *p;
755 pv = (iwbits_reverse_64(IW_ITOHLL(pv)) >> (64 - bit));
756 if (pv) {
757 pv = iwbits_find_first_sbit64(pv);
758 if (pv >= size) {
759 return 0;
760 } else {
761 *found = 1;
762 assert(offset_bit > pv);
763 return offset_bit > pv ? offset_bit - pv - 1 : 0;
764 }
765 }
766 offset_bit -= bit;
767 size -= bit;
768 }
769 while (size & ~(64 - 1)) {
770 if (*(--p)) {
771 pv = *p;
772 *found = 1;
773 tmp = iwbits_find_first_sbit64(iwbits_reverse_64(IW_ITOHLL(pv)));
774 assert(offset_bit > tmp);
775 return offset_bit > tmp ? offset_bit - tmp - 1 : 0;
776 }
777 offset_bit -= 64;
778 size -= 64;
779 }
780 if (size == 0) {
781 return 0;
782 }
783 pv = *(--p);
784 tmp = iwbits_reverse_64(IW_ITOHLL(pv)) & ((((uint64_t) 1) << size) - 1);
785 #else
786 if (bit) {
787 tmp = (iwbits_reverse_64(*p) >> (64 - bit));
788 if (tmp) {
789 tmp = iwbits_find_first_sbit64(tmp);
790 if (tmp >= size) {
791 return 0;
792 } else {
793 *found = 1;
794 assert(offset_bit > tmp);
795 return offset_bit > tmp ? offset_bit - tmp - 1 : 0;
796 }
797 }
798 offset_bit -= bit;
799 size -= bit;
800 }
801 while (size & ~(64 - 1)) {
802 if (*(--p)) {
803 *found = 1;
804 tmp = iwbits_find_first_sbit64(iwbits_reverse_64(*p));
805 assert(offset_bit > tmp);
806 return offset_bit > tmp ? offset_bit - tmp - 1 : 0;
807 }
808 offset_bit -= 64;
809 size -= 64;
810 }
811 if (size == 0) {
812 return 0;
813 }
814 tmp = iwbits_reverse_64(*(--p)) & ((((uint64_t) 1) << size) - 1);
815 #endif
816 if (tmp) {
817 uint64_t tmp2;
818 *found = 1;
819 tmp2 = iwbits_find_first_sbit64(tmp);
820 assert(offset_bit > tmp2);
821 return offset_bit > tmp2 ? offset_bit - tmp2 - 1 : 0;
822 } else {
823 return 0;
824 }
825 }
826
827 /**
828 * @brief Return a previously allocated blocks
829 * back into the free-blocks pool.
830 *
831 * @param impl `FSM`
832 * @param offset_blk Starting block number of the specified range.
833 * @param length_blk Range size in blocks.
834 */
_fsm_blk_deallocate_lw(FSM * impl,const uint64_t offset_blk,const uint64_t length_blk)835 static iwrc _fsm_blk_deallocate_lw(FSM *impl,
836 const uint64_t offset_blk,
837 const uint64_t length_blk) {
838 iwrc rc;
839 uint64_t *bmptr;
840 uint64_t left, right;
841 int hasleft = 0, hasright = 0;
842 uint64_t key_offset = offset_blk, key_length = length_blk;
843 uint64_t rm_offset = 0, rm_length = 0;
844 uint64_t lfbkoff = impl->lfbkoff;
845 uint64_t end_offset_blk = offset_blk + length_blk;
846 fsm_bmopts_t bopts = FSM_BM_NONE;
847
848
849 if (impl->oflags & IWFSM_STRICT) {
850 bopts |= FSM_BM_STRICT;
851 }
852 rc = _fsm_set_bit_status_lw(impl, offset_blk, length_blk, 0, bopts);
853 RCRET(rc);
854
855 rc = _fsm_bmptr(impl, &bmptr);
856 RCRET(rc);
857
858 /* Merge with neighborhoods */
859 left = _fsm_find_prev_set_bit(bmptr, offset_blk, 0, &hasleft);
860 if (lfbkoff && lfbkoff == end_offset_blk) {
861 right = lfbkoff + impl->lfbklen;
862 hasright = 1;
863 } else {
864 uint64_t maxoff = lfbkoff ? lfbkoff : (impl->bmlen << 3);
865 right = _fsm_find_next_set_bit(bmptr, end_offset_blk, maxoff, &hasright);
866 }
867
868 if (hasleft) {
869 if (offset_blk > left + 1) {
870 left += 1;
871 rm_offset = left;
872 rm_length = offset_blk - left;
873 IWRC(_fsm_del_fbk(impl, rm_offset, rm_length), rc);
874 key_offset = rm_offset;
875 key_length += rm_length;
876 }
877 } else if (offset_blk > 0) { /* zero start */
878 rm_offset = 0;
879 rm_length = offset_blk;
880 IWRC(_fsm_del_fbk(impl, rm_offset, rm_length), rc);
881 key_offset = rm_offset;
882 key_length += rm_length;
883 }
884 if (hasright && right > end_offset_blk) {
885 rm_offset = end_offset_blk;
886 rm_length = right - end_offset_blk;
887 _fsm_del_fbk(impl, rm_offset, rm_length);
888 key_length += rm_length;
889 }
890 IWRC(_fsm_put_fbk(impl, key_offset, key_length), rc);
891 return rc;
892 }
893
894 /**
895 * @brief Initialize a new free-space bitmap area.
896 *
897 * If bitmap exists, its content will be moved into newly created area.
898 * Blocks from the previous bitmap are will disposed and deallocated.
899 *
900 * @param impl `FSM`
901 * @param bmoff Byte offset of the new bitmap. Value must be page aligned.
902 * @param bmlen Byte length of the new bitmap. Value must be page aligned.
903 Its length must not be lesser than length of old bitmap.
904 */
_fsm_init_lw(FSM * impl,uint64_t bmoff,uint64_t bmlen)905 static iwrc _fsm_init_lw(FSM *impl, uint64_t bmoff, uint64_t bmlen) {
906 iwrc rc;
907 uint8_t *mm, *mm2;
908 size_t sp, sp2;
909 uint64_t old_bmoff, old_bmlen;
910 IWFS_EXT *pool = &impl->pool;
911
912 if ((bmlen & ((1U << impl->bpow) - 1)) || (bmoff & ((1U << impl->bpow) - 1)) || (bmoff & (impl->aunit - 1))) {
913 return IWFS_ERROR_RANGE_NOT_ALIGNED;
914 }
915 if (bmlen < impl->bmlen) {
916 rc = IW_ERROR_INVALID_ARGS;
917 iwlog_ecode_error(rc, "Length of the newly initiated bitmap area (bmlen): %" PRIu64
918 " must not be lesser than the current bitmap area length %" PRIu64 "",
919 bmlen, impl->bmlen);
920 return rc;
921 }
922 if (bmlen * 8 < ((bmoff + bmlen) >> impl->bpow) + 1) {
923 rc = IW_ERROR_INVALID_ARGS;
924 iwlog_ecode_error(rc, "Length of the newly initiated bitmap area (bmlen): %" PRIu64
925 " is not enough to handle bitmap itself and the file header area.",
926 bmlen);
927 return rc;
928 }
929 rc = _fsm_ensure_size_lw(impl, bmoff + bmlen);
930 RCRET(rc);
931 if (impl->mmap_all) {
932 // get mmap area without locking, since we ensured what pool file will not be remapped
933 rc = pool->probe_mmap(pool, 0, &mm, &sp);
934 RCRET(rc);
935 if (sp < bmoff + bmlen) {
936 return IWFS_ERROR_NOT_MMAPED;
937 } else {
938 mm += bmoff;
939 }
940 } else {
941 // get mmap area without locking, since we ensured what pool file will not be remapped
942 rc = pool->probe_mmap(pool, bmoff, &mm, &sp);
943 RCRET(rc);
944 if (sp < bmlen) {
945 return IWFS_ERROR_NOT_MMAPED;
946 }
947 }
948 if (impl->bmlen) {
949 /* We have an old active bitmap. Lets copy its content to the new location.*/
950 if (IW_RANGES_OVERLAP(impl->bmoff, impl->bmoff + impl->bmlen, bmoff, bmoff + bmlen)) {
951 iwlog_ecode_error2(rc, "New and old bitmap areas are overlaped");
952 return IW_ERROR_INVALID_ARGS;
953 }
954 if (impl->mmap_all) {
955 mm2 = mm - bmoff + impl->bmoff;
956 } else {
957 rc = pool->probe_mmap(pool, impl->bmoff, &mm2, &sp2);
958 if (!rc && sp2 < impl->bmlen) {
959 rc = IWFS_ERROR_NOT_MMAPED;
960 }
961 if (rc) {
962 iwlog_ecode_error2(rc, "Old bitmap area is not mmaped");
963 return rc;
964 }
965 }
966 assert(!((impl->bmlen - bmlen) & ((1U << impl->bpow) - 1)));
967 if (impl->dlsnr) {
968 rc = impl->dlsnr->onwrite(impl->dlsnr, bmoff, mm2, impl->bmlen, 0);
969 RCRET(rc);
970 }
971 memcpy(mm, mm2, impl->bmlen);
972 if (bmlen > impl->bmlen) {
973 memset(mm + impl->bmlen, 0, bmlen - impl->bmlen);
974 if (impl->dlsnr) {
975 rc = impl->dlsnr->onset(impl->dlsnr, bmoff + impl->bmlen, 0, bmlen - impl->bmlen, 0);
976 RCRET(rc);
977 }
978 }
979 } else {
980 mm2 = 0;
981 memset(mm, 0, bmlen);
982 if (impl->dlsnr) {
983 rc = impl->dlsnr->onset(impl->dlsnr, bmoff, 0, bmlen, 0);
984 RCRET(rc);
985 }
986 }
987
988 /* Backup the previous bitmap range */
989 old_bmlen = impl->bmlen;
990 old_bmoff = impl->bmoff;
991 impl->bmoff = bmoff;
992 impl->bmlen = bmlen;
993
994 rc = _fsm_set_bit_status_lw(impl, (bmoff >> impl->bpow), (bmlen >> impl->bpow), 1, FSM_BM_NONE);
995 RCGO(rc, rollback);
996 if (!old_bmlen) { /* First time initialization */
997 /* Header allocation */
998 rc = _fsm_set_bit_status_lw(impl, 0, (impl->hdrlen >> impl->bpow), 1, FSM_BM_NONE);
999 RCGO(rc, rollback);
1000 }
1001
1002 /* Reload fsm tree */
1003 _fsm_load_fsm_lw(impl, mm, bmlen);
1004
1005 /* Flush new meta */
1006 rc = _fsm_write_meta_lw(impl);
1007 RCGO(rc, rollback);
1008
1009 rc = pool->sync(pool, IWFS_FDATASYNC);
1010 RCGO(rc, rollback);
1011
1012 if (old_bmlen) {
1013 /* Now we are save to deallocate the old bitmap */
1014 rc = _fsm_blk_deallocate_lw(impl, (old_bmoff >> impl->bpow), (old_bmlen >> impl->bpow));
1015 if (!impl->mmap_all) {
1016 pool->remove_mmap(pool, old_bmoff);
1017 }
1018 }
1019 return rc;
1020
1021 rollback: /* try to rollback previous bitmap state */
1022 impl->bmoff = old_bmoff;
1023 impl->bmlen = old_bmlen;
1024 if (old_bmlen && mm2) {
1025 _fsm_load_fsm_lw(impl, mm2, old_bmlen);
1026 }
1027 pool->sync(pool, IWFS_FDATASYNC);
1028 return rc;
1029 }
1030
1031 /**
1032 * @brief Resize bitmap area.
1033 * @param impl `FSM`
1034 * @param size New size of bitmap area in bytes.
1035 */
_fsm_resize_fsm_bitmap_lw(FSM * impl,uint64_t size)1036 static iwrc _fsm_resize_fsm_bitmap_lw(FSM *impl, uint64_t size) {
1037 iwrc rc;
1038 uint64_t bmoffset = 0, bmlen, sp;
1039 IWFS_EXT *pool = &impl->pool;
1040
1041 if (impl->bmlen >= size) {
1042 return 0;
1043 }
1044 bmlen = IW_ROUNDUP(size, impl->aunit); /* align to the system page size. */
1045 rc = _fsm_blk_allocate_aligned_lw(
1046 impl, (bmlen >> impl->bpow), &bmoffset, &sp, UINT64_MAX,
1047 IWFSM_ALLOC_NO_STATS | IWFSM_ALLOC_NO_EXTEND | IWFSM_ALLOC_NO_OVERALLOCATE);
1048 if (!rc) {
1049 bmoffset = bmoffset << impl->bpow;
1050 bmlen = sp << impl->bpow;
1051 } else if (rc == IWFS_ERROR_NO_FREE_SPACE) {
1052 bmoffset = impl->bmlen * (1 << impl->bpow) * 8;
1053 bmoffset = IW_ROUNDUP(bmoffset, impl->aunit);
1054 }
1055 if (!impl->mmap_all) {
1056 rc = pool->add_mmap(pool, bmoffset, bmlen, impl->mmap_opts);
1057 RCRET(rc);
1058 }
1059 rc = _fsm_init_lw(impl, bmoffset, bmlen);
1060 if (rc && !impl->mmap_all) {
1061 pool->remove_mmap(pool, bmoffset);
1062 }
1063 return rc;
1064 }
1065
1066 /**
1067 * @brief Allocate a continuous segment of blocks.
1068 *
1069 * @param impl `FSM`
1070 * @param length_blk Desired segment length in blocks.
1071 * @param [in,out] offset_blk Allocated segment offset in blocks will be stored into.
1072 * It also specified the desired segment offset to provide allocation locality.
1073 * @param [out] olength_blk Assigned segment length in blocks.
1074 * @param opts
1075 */
_fsm_blk_allocate_lw(FSM * impl,uint64_t length_blk,uint64_t * offset_blk,uint64_t * olength_blk,iwfs_fsm_aflags opts)1076 static iwrc _fsm_blk_allocate_lw(FSM *impl,
1077 uint64_t length_blk,
1078 uint64_t *offset_blk,
1079 uint64_t *olength_blk,
1080 iwfs_fsm_aflags opts) {
1081 iwrc rc;
1082 FSMBK *nk;
1083 fsm_bmopts_t bopts = FSM_BM_NONE;
1084
1085 if (opts & IWFSM_ALLOC_PAGE_ALIGNED) {
1086 while (1) {
1087 rc = _fsm_blk_allocate_aligned_lw(impl, length_blk, offset_blk, olength_blk, UINT64_MAX, opts);
1088 if (rc == IWFS_ERROR_NO_FREE_SPACE) {
1089 if (opts & IWFSM_ALLOC_NO_EXTEND) {
1090 return IWFS_ERROR_NO_FREE_SPACE;
1091 }
1092 rc = _fsm_resize_fsm_bitmap_lw(impl, impl->bmlen << 1);
1093 RCRET(rc);
1094 continue;
1095 }
1096 if (!rc && (opts & IWFSM_SOLID_ALLOCATED_SPACE)) {
1097 uint64_t bs = *offset_blk;
1098 int64_t bl = *olength_blk;
1099 rc = _fsm_ensure_size_lw(impl, (bs << impl->bpow) + (bl << impl->bpow));
1100 }
1101 return rc;
1102 }
1103 }
1104
1105 *olength_blk = length_blk;
1106
1107 start:
1108 nk = _fsm_find_matching_fblock_lw(impl, *offset_blk, length_blk, opts);
1109 if (nk) { /* use existing free space block */
1110 uint64_t nlength = FSMBK_LENGTH(nk);
1111 *offset_blk = FSMBK_OFFSET(nk);
1112 assert(kb_get(fsm, impl->fsm, *nk));
1113
1114 _fsm_del_fbk2(impl, *nk);
1115
1116 if (nlength > length_blk) { /* re-save rest of free-space */
1117 if (!(opts & IWFSM_ALLOC_NO_OVERALLOCATE) && impl->crznum) {
1118 /* todo use lognormal distribution? */
1119 double_t d = ((double_t) impl->crzsum / (double_t) impl->crznum) /*avg*/
1120 - (nlength - length_blk); /*rest blk size*/
1121 double_t s = ((double_t) impl->crzvar / (double_t) impl->crznum) * 6.0; /* blk size dispersion * 6 */
1122 if (s > 1 && d > 0 && d * d > s) { /* its better to attach rest of block to the record */
1123 *olength_blk = nlength;
1124 } else {
1125 _fsm_put_fbk(impl, (*offset_blk + length_blk), (nlength - length_blk));
1126 }
1127 } else {
1128 _fsm_put_fbk(impl, (*offset_blk + length_blk), (nlength - length_blk));
1129 }
1130 }
1131 } else {
1132 if (opts & IWFSM_ALLOC_NO_EXTEND) {
1133 return IWFS_ERROR_NO_FREE_SPACE;
1134 }
1135 rc = _fsm_resize_fsm_bitmap_lw(impl, impl->bmlen << 1);
1136 RCRET(rc);
1137 goto start;
1138 }
1139
1140 if (impl->oflags & IWFSM_STRICT) {
1141 bopts |= FSM_BM_STRICT;
1142 }
1143 rc = _fsm_set_bit_status_lw(impl, *offset_blk, *olength_blk, 1, bopts);
1144 if (!rc && !(opts & IWFSM_ALLOC_NO_STATS)) {
1145 double_t avg;
1146 /* Update allocation statistics */
1147 if (impl->crznum > FSM_MAX_STATS_COUNT) {
1148 impl->crznum = 0;
1149 impl->crzsum = 0;
1150 impl->crzvar = 0;
1151 }
1152 ++impl->crznum;
1153 impl->crzsum += length_blk;
1154 avg = (double_t) impl->crzsum / (double_t) impl->crznum; /* average */
1155 impl->crzvar +=
1156 (uint64_t)(((double_t) length_blk - avg) * ((double_t) length_blk - avg) + 0.5L); /* variance */
1157 }
1158 if (!rc && (opts & IWFSM_SOLID_ALLOCATED_SPACE)) {
1159 uint64_t bs = *offset_blk;
1160 int64_t bl = *olength_blk;
1161 rc = _fsm_ensure_size_lw(impl, (bs << impl->bpow) + (bl << impl->bpow));
1162 }
1163 if (!rc && (opts & IWFSM_SYNC_BMAP)) {
1164 uint64_t *bmptr;
1165 if (!_fsm_bmptr(impl, &bmptr)) {
1166 IWFS_EXT *pool = &impl->pool;
1167 rc = pool->sync_mmap(pool, impl->bmoff, IWFS_SYNCDEFAULT);
1168 }
1169 }
1170 return rc;
1171 }
1172
1173 /**
1174 * @brief Remove all free blocks from the and of file and trim its size.
1175 */
_fsm_trim_tail_lw(FSM * impl)1176 static iwrc _fsm_trim_tail_lw(FSM *impl) {
1177 iwrc rc;
1178 int hasleft;
1179 uint64_t length, lastblk, *bmptr;
1180 IWFS_EXT_STATE fstate;
1181 uint64_t offset = 0;
1182
1183 if (!(impl->omode & IWFS_OWRITE)) {
1184 return 0;
1185 }
1186 /* find free space for fsm with lesser offset than actual */
1187 rc = _fsm_blk_allocate_aligned_lw(
1188 impl, (impl->bmlen >> impl->bpow), &offset, &length, (impl->bmoff >> impl->bpow),
1189 IWFSM_ALLOC_NO_EXTEND | IWFSM_ALLOC_NO_OVERALLOCATE | IWFSM_ALLOC_NO_STATS);
1190
1191 if (rc && rc != IWFS_ERROR_NO_FREE_SPACE) {
1192 return rc;
1193 }
1194 if (rc) {
1195 rc = 0;
1196 } else if ((offset << impl->bpow) < impl->bmoff) {
1197 offset = offset << impl->bpow;
1198 length = length << impl->bpow;
1199 assert(offset != impl->bmoff);
1200 impl->pool.add_mmap(&impl->pool, offset, length, impl->mmap_opts);
1201 rc = _fsm_init_lw(impl, offset, length);
1202 RCGO(rc, finish);
1203 } else {
1204 /* shoud never be reached */
1205 assert(0);
1206 rc = _fsm_blk_deallocate_lw(impl, offset, length);
1207 RCGO(rc, finish);
1208 }
1209
1210 rc = _fsm_bmptr(impl, &bmptr); // -V519
1211 RCGO(rc, finish);
1212
1213 lastblk = (impl->bmoff + impl->bmlen) >> impl->bpow;
1214 offset = _fsm_find_prev_set_bit(bmptr, (impl->bmlen << 3), lastblk, &hasleft);
1215 if (hasleft) {
1216 lastblk = offset + 1;
1217 }
1218 rc = impl->pool.state(&impl->pool, &fstate);
1219 if (!rc && fstate.fsize > (lastblk << impl->bpow)) {
1220 rc = impl->pool.truncate(&impl->pool, lastblk << impl->bpow);
1221 }
1222
1223 finish:
1224 return rc;
1225 }
1226
_fsm_init_impl(FSM * impl,const IWFS_FSM_OPTS * opts)1227 static iwrc _fsm_init_impl(FSM *impl, const IWFS_FSM_OPTS *opts) {
1228 impl->oflags = opts->oflags;
1229 impl->aunit = iwp_alloc_unit();
1230 impl->bpow = opts->bpow;
1231 impl->mmap_all = opts->mmap_all;
1232 if (!impl->bpow) {
1233 impl->bpow = 6; // 64bit block
1234 } else if (impl->bpow > FSM_MAX_BLOCK_POW) {
1235 return IWFS_ERROR_INVALID_BLOCK_SIZE;
1236 } else if ((1U << impl->bpow) > impl->aunit) {
1237 return IWFS_ERROR_PLATFORM_PAGE;
1238 }
1239 return 0;
1240 }
1241
_fsm_init_locks(FSM * impl,const IWFS_FSM_OPTS * opts)1242 static iwrc _fsm_init_locks(FSM *impl, const IWFS_FSM_OPTS *opts) {
1243 if (opts->oflags & IWFSM_NOLOCKS) {
1244 impl->ctlrwlk = 0;
1245 return 0;
1246 }
1247 impl->ctlrwlk = calloc(1, sizeof(*impl->ctlrwlk));
1248 if (!impl->ctlrwlk) {
1249 return iwrc_set_errno(IW_ERROR_ALLOC, errno);
1250 }
1251 int rci = pthread_rwlock_init(impl->ctlrwlk, 0);
1252 if (rci) {
1253 free(impl->ctlrwlk);
1254 impl->ctlrwlk = 0;
1255 return iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci);
1256 }
1257 return 0;
1258 }
1259
_fsm_destroy_locks(FSM * impl)1260 static iwrc _fsm_destroy_locks(FSM *impl) {
1261 if (!impl->ctlrwlk) {
1262 return 0;
1263 }
1264 iwrc rc = 0;
1265 int rci = pthread_rwlock_destroy(impl->ctlrwlk);
1266 if (rci) {
1267 IWRC(iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci), rc);
1268 }
1269 free(impl->ctlrwlk);
1270 impl->ctlrwlk = 0;
1271 return rc;
1272 }
1273
_fsm_read_meta_lr(FSM * impl)1274 static iwrc _fsm_read_meta_lr(FSM *impl) {
1275 iwrc rc;
1276 uint32_t lv;
1277 uint64_t llv;
1278 size_t sp, rp = 0;
1279 uint8_t hdr[IWFSM_CUSTOM_HDR_DATA_OFFSET] = {0};
1280
1281 /*
1282 [FSM_CTL_MAGICK u32][block pow u8]
1283 [bmoffset u64][bmlength u64]
1284 [u64 crzsum][u32 crznum][u64 crszvar][u256 reserved]
1285 [custom header size u32][custom header data...]
1286 [fsm data...]
1287 */
1288
1289 rc = impl->pool.read(&impl->pool, 0, hdr, IWFSM_CUSTOM_HDR_DATA_OFFSET, &sp);
1290 if (rc) {
1291 iwlog_ecode_error3(rc);
1292 return rc;
1293 }
1294
1295 /* Magic */
1296 memcpy(&lv, hdr + rp, sizeof(lv)); // -V512
1297 lv = IW_ITOHL(lv);
1298 if (lv != IWFSM_MAGICK) {
1299 rc = IWFS_ERROR_INVALID_FILEMETA;
1300 iwlog_ecode_error2(rc, "Invalid file magic number");
1301 return rc;
1302 }
1303 rp += sizeof(lv);
1304
1305 /* Block pow */
1306 memcpy(&impl->bpow, hdr + rp, sizeof(impl->bpow));
1307 rp += sizeof(impl->bpow);
1308
1309 if (impl->bpow > FSM_MAX_BLOCK_POW) {
1310 rc = IWFS_ERROR_INVALID_FILEMETA;
1311 iwlog_ecode_error(rc, "Invalid file blocks pow: %u", impl->bpow);
1312 return rc;
1313 }
1314 if ((1U << impl->bpow) > impl->aunit) {
1315 rc = IWFS_ERROR_PLATFORM_PAGE;
1316 iwlog_ecode_error(rc, "Block size: %u must not be greater than system page size: %zu",
1317 (1U << impl->bpow), impl->aunit);
1318 }
1319
1320 /* Free-space bitmap offset */
1321 memcpy(&llv, hdr + rp, sizeof(llv));
1322 llv = IW_ITOHLL(llv);
1323 impl->bmoff = llv;
1324 rp += sizeof(llv);
1325
1326 /* Free-space bitmap length */
1327 memcpy(&llv, hdr + rp, sizeof(llv));
1328 llv = IW_ITOHLL(llv);
1329 impl->bmlen = llv;
1330 if (llv & (64 - 1)) {
1331 rc = IWFS_ERROR_INVALID_FILEMETA;
1332 iwlog_ecode_error(rc, "Free-space bitmap length is not 64bit aligned: %" PRIuMAX "", impl->bmlen);
1333 }
1334 rp += sizeof(llv);
1335
1336 /* Cumulative sum of record sizes acquired by `allocate` */
1337 memcpy(&llv, hdr + rp, sizeof(llv));
1338 llv = IW_ITOHLL(llv);
1339 impl->crzsum = llv;
1340 rp += sizeof(llv);
1341
1342 /* Cumulative number of records acquired by `allocated` */
1343 memcpy(&lv, hdr + rp, sizeof(lv));
1344 lv = IW_ITOHL(lv);
1345 impl->crznum = lv;
1346 rp += sizeof(lv);
1347
1348 /* Record sizes standard variance (deviation^2 * N) */
1349 memcpy(&llv, hdr + rp, sizeof(llv));
1350 llv = IW_ITOHLL(llv);
1351 impl->crzvar = llv;
1352 rp += sizeof(llv);
1353
1354 /* Reserved */
1355 rp += 32;
1356
1357 /* Header size */
1358 memcpy(&lv, hdr + rp, sizeof(lv));
1359 lv = IW_ITOHL(lv);
1360 impl->hdrlen = lv;
1361 rp += sizeof(lv);
1362
1363 assert(rp == IWFSM_CUSTOM_HDR_DATA_OFFSET);
1364 return rc;
1365 }
1366
_fsm_init_new_lw(FSM * impl,const IWFS_FSM_OPTS * opts)1367 static iwrc _fsm_init_new_lw(FSM *impl, const IWFS_FSM_OPTS *opts) {
1368 FSM_ENSURE_OPEN(impl);
1369 iwrc rc;
1370 uint64_t bmlen, bmoff;
1371 IWFS_EXT *pool = &impl->pool;
1372 assert(impl->aunit && impl->bpow);
1373
1374 impl->hdrlen = opts->hdrlen + IWFSM_CUSTOM_HDR_DATA_OFFSET;
1375 impl->hdrlen = IW_ROUNDUP(impl->hdrlen, 1ULL << impl->bpow);
1376 bmlen = opts->bmlen > 0 ? IW_ROUNDUP(opts->bmlen, impl->aunit) : impl->aunit;
1377 bmoff = IW_ROUNDUP(impl->hdrlen, impl->aunit);
1378
1379 if (impl->mmap_all) {
1380 /* mmap whole file */
1381 rc = pool->add_mmap(pool, 0, SIZE_T_MAX, impl->mmap_opts);
1382 RCRET(rc);
1383 } else {
1384 /* mmap header */
1385 rc = pool->add_mmap(pool, 0, impl->hdrlen, impl->mmap_opts);
1386 RCRET(rc);
1387 /* mmap the fsm bitmap index */
1388 rc = pool->add_mmap(pool, bmoff, bmlen, impl->mmap_opts);
1389 RCRET(rc);
1390 }
1391 return _fsm_init_lw(impl, bmoff, bmlen);
1392 }
1393
_fsm_init_existing_lw(FSM * impl)1394 static iwrc _fsm_init_existing_lw(FSM *impl) {
1395 FSM_ENSURE_OPEN(impl);
1396 iwrc rc;
1397 size_t sp;
1398 uint8_t *mm;
1399 IWFS_EXT *pool = &impl->pool;
1400
1401 rc = _fsm_read_meta_lr(impl);
1402 RCGO(rc, finish);
1403
1404 if (impl->mmap_all) {
1405 /* mmap the whole file */
1406 rc = pool->add_mmap(pool, 0, SIZE_T_MAX, impl->mmap_opts);
1407 RCGO(rc, finish);
1408 rc = pool->probe_mmap(pool, 0, &mm, &sp);
1409 RCGO(rc, finish);
1410 if (sp < impl->bmoff + impl->bmlen) {
1411 rc = IWFS_ERROR_NOT_MMAPED;
1412 goto finish;
1413 } else {
1414 mm += impl->bmoff;
1415 }
1416 } else {
1417 /* mmap the header of file */
1418 rc = pool->add_mmap(pool, 0, impl->hdrlen, impl->mmap_opts);
1419 RCGO(rc, finish);
1420 /* mmap the fsm bitmap index */
1421 rc = pool->add_mmap(pool, impl->bmoff, impl->bmlen, impl->mmap_opts);
1422 RCGO(rc, finish);
1423 rc = pool->probe_mmap(pool, impl->bmoff, &mm, &sp);
1424 RCGO(rc, finish);
1425 if (sp < impl->bmlen) {
1426 rc = IWFS_ERROR_NOT_MMAPED;
1427 goto finish;
1428 }
1429 }
1430
1431 _fsm_load_fsm_lw(impl, mm, impl->bmlen);
1432
1433 finish:
1434 return rc;
1435 }
1436
1437 /**
1438 * @brief Check if all blocks within the specified range have been `allocated`.
1439 *
1440 * @param impl `FSM`
1441 * @param offset_blk Starting block number of the specified range.
1442 * @param length_blk Range size in blocks.
1443 * @param [out] ret Checking result.
1444 */
_fsm_is_fully_allocated_lr(FSM * impl,uint64_t offset_blk,uint64_t length_blk,int * ret)1445 static iwrc _fsm_is_fully_allocated_lr(FSM *impl, uint64_t offset_blk, uint64_t length_blk, int *ret) {
1446 uint64_t end = offset_blk + length_blk;
1447 *ret = 1;
1448 if (length_blk < 1 || end < offset_blk || end > (impl->bmlen << 3)) {
1449 *ret = 0;
1450 return 0;
1451 }
1452 iwrc rc = _fsm_set_bit_status_lw(impl, offset_blk, length_blk, 0, FSM_BM_DRY_RUN | FSM_BM_STRICT);
1453 if (rc == IWFS_ERROR_FSM_SEGMENTATION) {
1454 *ret = 0;
1455 return 0;
1456 }
1457 return rc;
1458 }
1459
1460 /*************************************************************************************************
1461 * Public API *
1462 *************************************************************************************************/
1463
_fsm_write(struct IWFS_FSM * f,off_t off,const void * buf,size_t siz,size_t * sp)1464 static iwrc _fsm_write(struct IWFS_FSM *f, off_t off, const void *buf, size_t siz, size_t *sp) {
1465 FSM_ENSURE_OPEN2(f);
1466 FSM *impl = f->impl;
1467 iwrc rc = _fsm_ctrl_rlock(impl);
1468 RCRET(rc);
1469 if (impl->oflags & IWFSM_STRICT) {
1470 int allocated = 0;
1471 IWRC(_fsm_is_fully_allocated_lr(impl,
1472 (uint64_t) off >> impl->bpow,
1473 IW_ROUNDUP(siz, 1ULL << impl->bpow) >> impl->bpow,
1474 &allocated), rc);
1475 if (!rc) {
1476 if (!allocated) {
1477 rc = IWFS_ERROR_FSM_SEGMENTATION;
1478 } else {
1479 rc = impl->pool.write(&impl->pool, off, buf, siz, sp);
1480 }
1481 }
1482 } else {
1483 rc = impl->pool.write(&impl->pool, off, buf, siz, sp);
1484 }
1485 _fsm_ctrl_unlock(impl);
1486 return rc;
1487 }
1488
_fsm_read(struct IWFS_FSM * f,off_t off,void * buf,size_t siz,size_t * sp)1489 static iwrc _fsm_read(struct IWFS_FSM *f, off_t off, void *buf, size_t siz, size_t *sp) {
1490 FSM_ENSURE_OPEN2(f);
1491 FSM *impl = f->impl;
1492 iwrc rc = _fsm_ctrl_rlock(impl);
1493 RCRET(rc);
1494 if (impl->oflags & IWFSM_STRICT) {
1495 int allocated = 0;
1496 IWRC(_fsm_is_fully_allocated_lr(impl, (uint64_t) off >> impl->bpow,
1497 IW_ROUNDUP(siz, 1ULL << impl->bpow) >> impl->bpow,
1498 &allocated), rc);
1499 if (!rc) {
1500 if (!allocated) {
1501 rc = IWFS_ERROR_FSM_SEGMENTATION;
1502 } else {
1503 rc = impl->pool.read(&impl->pool, off, buf, siz, sp);
1504 }
1505 }
1506 } else {
1507 rc = impl->pool.read(&impl->pool, off, buf, siz, sp);
1508 }
1509 _fsm_ctrl_unlock(impl);
1510 return rc;
1511 }
1512
_fsm_sync(struct IWFS_FSM * f,iwfs_sync_flags flags)1513 static iwrc _fsm_sync(struct IWFS_FSM *f, iwfs_sync_flags flags) {
1514 FSM_ENSURE_OPEN2(f);
1515 iwrc rc = _fsm_ctrl_rlock(f->impl);
1516 RCRET(rc);
1517 IWRC(_fsm_write_meta_lw(f->impl), rc);
1518 IWRC(f->impl->pool.sync(&f->impl->pool, flags), rc);
1519 IWRC(_fsm_ctrl_unlock(f->impl), rc);
1520 return rc;
1521 }
1522
_fsm_close(struct IWFS_FSM * f)1523 static iwrc _fsm_close(struct IWFS_FSM *f) {
1524 if (!f || !f->impl) {
1525 return 0;
1526 }
1527 FSM *impl = f->impl;
1528 iwrc rc = 0;
1529 IWRC(_fsm_ctrl_wlock(impl), rc);
1530 if (impl->fsm && (impl->omode & IWFS_OWRITE)) {
1531 if (!(impl->oflags & IWFSM_NO_TRIM_ON_CLOSE)) {
1532 IWRC(_fsm_trim_tail_lw(impl), rc);
1533 }
1534 IWRC(_fsm_write_meta_lw(impl), rc);
1535 if (!impl->dlsnr) {
1536 IWRC(impl->pool.sync(&impl->pool, IWFS_SYNCDEFAULT), rc);
1537 }
1538 }
1539 IWRC(impl->pool.close(&impl->pool), rc);
1540 if (impl->fsm) {
1541 // -V:__kb_destroy:701, 769
1542 __kb_destroy(impl->fsm);
1543 }
1544 IWRC(_fsm_ctrl_unlock(impl), rc);
1545 IWRC(_fsm_destroy_locks(impl), rc);
1546 impl->f->impl = 0;
1547 impl->f = 0;
1548 free(impl);
1549 return rc;
1550 }
1551
_fsm_ensure_size_lw(FSM * impl,off_t size)1552 IW_INLINE iwrc _fsm_ensure_size_lw(FSM *impl, off_t size) {
1553 return impl->pool.ensure_size(&impl->pool, size);
1554 }
1555
_fsm_ensure_size(struct IWFS_FSM * f,off_t size)1556 static iwrc _fsm_ensure_size(struct IWFS_FSM *f, off_t size) {
1557 FSM_ENSURE_OPEN2(f);
1558 iwrc rc = _fsm_ctrl_rlock(f->impl);
1559 RCRET(rc);
1560 if (f->impl->bmoff + f->impl->bmlen > size) {
1561 rc = IWFS_ERROR_RESIZE_FAIL;
1562 goto finish;
1563 }
1564 rc = _fsm_ensure_size_lw(f->impl, size);
1565
1566 finish:
1567 IWRC(_fsm_ctrl_unlock(f->impl), rc);
1568 return rc;
1569 }
1570
_fsm_add_mmap(struct IWFS_FSM * f,off_t off,size_t maxlen,iwfs_ext_mmap_opts_t opts)1571 static iwrc _fsm_add_mmap(struct IWFS_FSM *f, off_t off, size_t maxlen, iwfs_ext_mmap_opts_t opts) {
1572 FSM_ENSURE_OPEN2(f);
1573 return f->impl->pool.add_mmap(&f->impl->pool, off, maxlen, opts);
1574 }
1575
_fsm_remap_all(struct IWFS_FSM * f)1576 static iwrc _fsm_remap_all(struct IWFS_FSM *f) {
1577 FSM_ENSURE_OPEN2(f);
1578 return f->impl->pool.remap_all(&f->impl->pool);
1579 }
1580
_fsm_acquire_mmap(struct IWFS_FSM * f,off_t off,uint8_t ** mm,size_t * sp)1581 iwrc _fsm_acquire_mmap(struct IWFS_FSM *f, off_t off, uint8_t **mm, size_t *sp) {
1582 return f->impl->pool.acquire_mmap(&f->impl->pool, off, mm, sp);
1583 }
1584
_fsm_release_mmap(struct IWFS_FSM * f)1585 iwrc _fsm_release_mmap(struct IWFS_FSM *f) {
1586 return f->impl->pool.release_mmap(&f->impl->pool);
1587 }
1588
_fsm_probe_mmap(struct IWFS_FSM * f,off_t off,uint8_t ** mm,size_t * sp)1589 static iwrc _fsm_probe_mmap(struct IWFS_FSM *f, off_t off, uint8_t **mm, size_t *sp) {
1590 FSM_ENSURE_OPEN2(f);
1591 return f->impl->pool.probe_mmap(&f->impl->pool, off, mm, sp);
1592 }
1593
_fsm_remove_mmap(struct IWFS_FSM * f,off_t off)1594 static iwrc _fsm_remove_mmap(struct IWFS_FSM *f, off_t off) {
1595 FSM_ENSURE_OPEN2(f);
1596 return f->impl->pool.remove_mmap(&f->impl->pool, off);
1597 }
1598
_fsm_sync_mmap(struct IWFS_FSM * f,off_t off,iwfs_sync_flags flags)1599 static iwrc _fsm_sync_mmap(struct IWFS_FSM *f, off_t off, iwfs_sync_flags flags) {
1600 FSM_ENSURE_OPEN2(f);
1601 return f->impl->pool.sync_mmap(&f->impl->pool, off, flags);
1602 }
1603
_fsm_allocate(struct IWFS_FSM * f,off_t len,off_t * oaddr,off_t * olen,iwfs_fsm_aflags opts)1604 static iwrc _fsm_allocate(struct IWFS_FSM *f, off_t len, off_t *oaddr, off_t *olen, iwfs_fsm_aflags opts) {
1605 FSM_ENSURE_OPEN2(f);
1606 iwrc rc;
1607 uint64_t sbnum, nlen;
1608 FSM *impl = f->impl;
1609
1610 *olen = 0;
1611 if (!(impl->omode & IWFS_OWRITE)) {
1612 return IW_ERROR_READONLY;
1613 }
1614 if (len <= 0) {
1615 return IW_ERROR_INVALID_ARGS;
1616 }
1617 /* Required blocks number */
1618 sbnum = (uint64_t) * oaddr >> impl->bpow;
1619 len = IW_ROUNDUP(len, 1ULL << impl->bpow);
1620
1621 rc = _fsm_ctrl_wlock(impl);
1622 RCRET(rc);
1623 rc = _fsm_blk_allocate_lw(f->impl, (uint64_t) len >> impl->bpow, &sbnum, &nlen, opts);
1624 if (!rc) {
1625 *olen = (nlen << impl->bpow);
1626 *oaddr = (sbnum << impl->bpow);
1627 }
1628 IWRC(_fsm_ctrl_unlock(impl), rc);
1629 return rc;
1630 }
1631
_fsm_reallocate(struct IWFS_FSM * f,off_t nlen,off_t * oaddr,off_t * olen,iwfs_fsm_aflags opts)1632 static iwrc _fsm_reallocate(struct IWFS_FSM *f, off_t nlen, off_t *oaddr, off_t *olen, iwfs_fsm_aflags opts) {
1633 FSM_ENSURE_OPEN2(f);
1634 iwrc rc;
1635 FSM *impl = f->impl;
1636
1637 if (!(impl->omode & IWFS_OWRITE)) {
1638 return IW_ERROR_READONLY;
1639 }
1640 if ((*oaddr & ((1ULL << impl->bpow) - 1)) || (*olen & ((1ULL << impl->bpow) - 1))) {
1641 return IWFS_ERROR_RANGE_NOT_ALIGNED;
1642 }
1643 uint64_t sp;
1644 uint64_t nlen_blk = IW_ROUNDUP((uint64_t) nlen, 1ULL << impl->bpow) >> impl->bpow;
1645 uint64_t olen_blk = (uint64_t) * olen >> impl->bpow;
1646 uint64_t oaddr_blk = (uint64_t) * oaddr >> impl->bpow;
1647 uint64_t naddr_blk = oaddr_blk;
1648
1649 if (nlen_blk == olen_blk) {
1650 return 0;
1651 }
1652 rc = _fsm_ctrl_wlock(impl);
1653 RCRET(rc);
1654 if (nlen_blk < olen_blk) {
1655 rc = _fsm_blk_deallocate_lw(impl, oaddr_blk + nlen_blk, olen_blk - nlen_blk);
1656 if (!rc) {
1657 *oaddr = oaddr_blk << impl->bpow;
1658 *olen = nlen_blk << impl->bpow;
1659 }
1660 } else {
1661 rc = _fsm_blk_allocate_lw(impl, nlen_blk, &naddr_blk, &sp, opts);
1662 RCGO(rc, finish);
1663 if (naddr_blk != oaddr_blk) {
1664 rc = impl->pool.copy(&impl->pool, *oaddr, (size_t) * olen, naddr_blk << impl->bpow);
1665 RCGO(rc, finish);
1666 }
1667 rc = _fsm_blk_deallocate_lw(impl, oaddr_blk, olen_blk);
1668 RCGO(rc, finish);
1669 *oaddr = naddr_blk << impl->bpow;
1670 *olen = sp << impl->bpow;
1671 }
1672
1673 finish:
1674 IWRC(_fsm_ctrl_unlock(impl), rc);
1675 return rc;
1676 }
1677
_fsm_deallocate(struct IWFS_FSM * f,off_t addr,off_t len)1678 static iwrc _fsm_deallocate(struct IWFS_FSM *f, off_t addr, off_t len) {
1679 FSM_ENSURE_OPEN2(f);
1680 iwrc rc;
1681 FSM *impl = f->impl;
1682 off_t offset_blk = (uint64_t) addr >> impl->bpow;
1683 off_t length_blk = (uint64_t) len >> impl->bpow;
1684
1685 if (!(impl->omode & IWFS_OWRITE)) {
1686 return IW_ERROR_READONLY;
1687 }
1688 if (addr & ((1ULL << impl->bpow) - 1)) {
1689 return IWFS_ERROR_RANGE_NOT_ALIGNED;
1690 }
1691 rc = _fsm_ctrl_wlock(impl);
1692 RCRET(rc);
1693 if (IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, 0, (impl->hdrlen >> impl->bpow)) ||
1694 IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, (impl->bmoff >> impl->bpow),
1695 (impl->bmoff >> impl->bpow) + (impl->bmlen >> impl->bpow))) {
1696 // Deny deallocations in header or free-space bitmap itself
1697 IWRC(_fsm_ctrl_unlock(impl), rc);
1698 return IWFS_ERROR_FSM_SEGMENTATION;
1699 }
1700 rc = _fsm_blk_deallocate_lw(impl, (uint64_t) offset_blk, (uint64_t) length_blk);
1701 IWRC(_fsm_ctrl_unlock(impl), rc);
1702 return rc;
1703 }
1704
_fsm_check_allocation_status(struct IWFS_FSM * f,off_t addr,off_t len,bool allocated)1705 static iwrc _fsm_check_allocation_status(struct IWFS_FSM *f, off_t addr, off_t len, bool allocated) {
1706 FSM *impl = f->impl;
1707 if ((addr & ((1ULL << impl->bpow) - 1)) || (len & ((1ULL << impl->bpow) - 1))) {
1708 return IWFS_ERROR_RANGE_NOT_ALIGNED;
1709 }
1710 iwrc rc = _fsm_ctrl_rlock(impl);
1711 RCRET(rc);
1712 off_t offset_blk = (uint64_t) addr >> impl->bpow;
1713 off_t length_blk = (uint64_t) len >> impl->bpow;
1714 if (IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, 0, (impl->hdrlen >> impl->bpow)) ||
1715 IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, (impl->bmoff >> impl->bpow),
1716 (impl->bmoff >> impl->bpow) + (impl->bmlen >> impl->bpow))) {
1717 IWRC(_fsm_ctrl_unlock(impl), rc);
1718 return IWFS_ERROR_FSM_SEGMENTATION;
1719 }
1720 rc = _fsm_set_bit_status_lw(impl, (uint64_t) offset_blk, (uint64_t) length_blk,
1721 allocated ? 0 : 1, FSM_BM_DRY_RUN | FSM_BM_STRICT);
1722 IWRC(_fsm_ctrl_unlock(impl), rc);
1723 return rc;
1724 }
1725
_fsm_writehdr(struct IWFS_FSM * f,off_t off,const void * buf,off_t siz)1726 static iwrc _fsm_writehdr(struct IWFS_FSM *f, off_t off, const void *buf, off_t siz) {
1727 FSM_ENSURE_OPEN2(f);
1728 iwrc rc;
1729 uint8_t *mm;
1730 if (siz < 1) return 0;
1731 FSM *impl = f->impl;
1732 if ((IWFSM_CUSTOM_HDR_DATA_OFFSET + off + siz) > impl->hdrlen) {
1733 return IW_ERROR_OUT_OF_BOUNDS;
1734 }
1735 rc = impl->pool.acquire_mmap(&impl->pool, 0, &mm, 0);
1736 if (!rc) {
1737 if (impl->dlsnr) {
1738 rc = impl->dlsnr->onwrite(impl->dlsnr, IWFSM_CUSTOM_HDR_DATA_OFFSET + off, buf, siz, 0);
1739 }
1740 memmove(mm + IWFSM_CUSTOM_HDR_DATA_OFFSET + off, buf, (size_t) siz);
1741 IWRC(impl->pool.release_mmap(&impl->pool), rc);
1742 }
1743 return rc;
1744 }
1745
_fsm_readhdr(struct IWFS_FSM * f,off_t off,void * buf,off_t siz)1746 static iwrc _fsm_readhdr(struct IWFS_FSM *f, off_t off, void *buf, off_t siz) {
1747 FSM_ENSURE_OPEN2(f);
1748 iwrc rc;
1749 uint8_t *mm;
1750 if (siz < 1) return 0;
1751 FSM *impl = f->impl;
1752 if ((IWFSM_CUSTOM_HDR_DATA_OFFSET + off + siz) > impl->hdrlen) {
1753 return IW_ERROR_OUT_OF_BOUNDS;
1754 }
1755 rc = impl->pool.acquire_mmap(&impl->pool, 0, &mm, 0);
1756 if (!rc) {
1757 memmove(buf, mm + IWFSM_CUSTOM_HDR_DATA_OFFSET + off, (size_t) siz);
1758 rc = impl->pool.release_mmap(&impl->pool);
1759 }
1760 return rc;
1761 }
1762
_fsm_clear(struct IWFS_FSM * f,iwfs_fsm_clrfalgs clrflags)1763 static iwrc _fsm_clear(struct IWFS_FSM *f, iwfs_fsm_clrfalgs clrflags) {
1764 FSM_ENSURE_OPEN2(f);
1765 FSM *impl = f->impl;
1766 uint64_t bmoff, bmlen;
1767 iwrc rc = _fsm_ctrl_wlock(impl);
1768 bmlen = impl->bmlen;
1769 if (!bmlen) {
1770 goto finish;
1771 }
1772 if (!impl->mmap_all && impl->bmoff) {
1773 IWRC(impl->pool.remove_mmap(&impl->pool, impl->bmoff), rc);
1774 }
1775 bmoff = IW_ROUNDUP(impl->hdrlen, impl->aunit);
1776 if (!impl->mmap_all) {
1777 IWRC(impl->pool.add_mmap(&impl->pool, bmoff, bmlen, impl->mmap_opts), rc);
1778 }
1779 RCGO(rc, finish);
1780 impl->bmlen = 0;
1781 impl->bmoff = 0;
1782 rc = _fsm_init_lw(impl, bmoff, bmlen);
1783 if (!rc && (clrflags & IWFSM_CLEAR_TRIM)) {
1784 rc = _fsm_trim_tail_lw(impl);
1785 }
1786
1787 finish:
1788 IWRC(_fsm_ctrl_unlock(impl), rc);
1789 return rc;
1790 }
1791
_fsm_extfile(struct IWFS_FSM * f,IWFS_EXT ** ext)1792 static iwrc _fsm_extfile(struct IWFS_FSM *f, IWFS_EXT **ext) {
1793 FSM_ENSURE_OPEN2(f);
1794 *ext = &f->impl->pool;
1795 return 0;
1796 }
1797
_fsm_state(struct IWFS_FSM * f,IWFS_FSM_STATE * state)1798 static iwrc _fsm_state(struct IWFS_FSM *f, IWFS_FSM_STATE *state) {
1799 FSM_ENSURE_OPEN2(f);
1800 FSM *impl = f->impl;
1801 iwrc rc = _fsm_ctrl_rlock(impl);
1802 memset(state, 0, sizeof(*state));
1803 IWRC(impl->pool.state(&impl->pool, &state->exfile), rc);
1804 state->block_size = 1U << impl->bpow;
1805 state->oflags = impl->oflags;
1806 state->hdrlen = impl->hdrlen;
1807 state->blocks_num = impl->bmlen << 3;
1808 state->free_segments_num = (uint64_t) kb_size(impl->fsm);
1809 state->avg_alloc_size = impl->crznum > 0 ? (double_t) impl->crzsum / (double_t) impl->crznum : 0;
1810 state->alloc_dispersion = impl->crznum > 0 ? (double_t) impl->crzvar / (double_t) impl->crznum : 0;
1811 IWRC(_fsm_ctrl_unlock(impl), rc);
1812 return rc;
1813 }
1814
iwfs_fsmfile_open(IWFS_FSM * f,const IWFS_FSM_OPTS * opts)1815 iwrc iwfs_fsmfile_open(IWFS_FSM *f, const IWFS_FSM_OPTS *opts) {
1816 assert(f && opts);
1817 iwrc rc = 0;
1818 IWFS_EXT_STATE fstate = {0};
1819 const char *path = opts->exfile.file.path;
1820
1821 memset(f, 0, sizeof(*f));
1822 rc = iwfs_fsmfile_init();
1823 RCGO(rc, finish);
1824
1825 f->write = _fsm_write;
1826 f->read = _fsm_read;
1827 f->close = _fsm_close;
1828 f->sync = _fsm_sync;
1829 f->state = _fsm_state;
1830
1831 f->ensure_size = _fsm_ensure_size;
1832 f->add_mmap = _fsm_add_mmap;
1833 f->remap_all = _fsm_remap_all;
1834 f->acquire_mmap = _fsm_acquire_mmap;
1835 f->probe_mmap = _fsm_probe_mmap;
1836 f->release_mmap = _fsm_release_mmap;
1837 f->remove_mmap = _fsm_remove_mmap;
1838 f->sync_mmap = _fsm_sync_mmap;
1839
1840 f->allocate = _fsm_allocate;
1841 f->reallocate = _fsm_reallocate;
1842 f->deallocate = _fsm_deallocate;
1843 f->check_allocation_status = _fsm_check_allocation_status;
1844 f->writehdr = _fsm_writehdr;
1845 f->readhdr = _fsm_readhdr;
1846 f->clear = _fsm_clear;
1847 f->extfile = _fsm_extfile;
1848
1849 if (!path) {
1850 return IW_ERROR_INVALID_ARGS;
1851 }
1852 FSM *impl = f->impl = calloc(1, sizeof(*f->impl));
1853 if (!impl) {
1854 return iwrc_set_errno(IW_ERROR_ALLOC, errno);
1855 }
1856 impl->f = f;
1857 impl->dlsnr = opts->exfile.file.dlsnr; // Copy data changes listener address
1858 impl->mmap_opts = opts->mmap_opts;
1859
1860 IWFS_EXT_OPTS rwl_opts = opts->exfile;
1861 rwl_opts.use_locks = !(opts->oflags & IWFSM_NOLOCKS);
1862
1863 rc = _fsm_init_impl(impl, opts);
1864 RCGO(rc, finish);
1865
1866 rc = _fsm_init_locks(impl, opts);
1867 RCGO(rc, finish);
1868
1869 rc = iwfs_exfile_open(&impl->pool, &rwl_opts);
1870 RCGO(rc, finish);
1871
1872 rc = impl->pool.state(&impl->pool, &fstate);
1873 RCGO(rc, finish);
1874
1875 impl->omode = fstate.file.opts.omode;
1876
1877 if (fstate.file.ostatus & IWFS_OPEN_NEW) {
1878 rc = _fsm_init_new_lw(impl, opts);
1879 } else {
1880 rc = _fsm_init_existing_lw(impl);
1881 }
1882
1883 finish:
1884 if (rc) {
1885 if (f->impl) {
1886 IWRC(_fsm_destroy_locks(f->impl), rc); // we are not locked
1887 IWRC(_fsm_close(f), rc);
1888 }
1889 }
1890 return rc;
1891 }
1892
_fsmfile_ecodefn(locale_t locale,uint32_t ecode)1893 static const char *_fsmfile_ecodefn(locale_t locale, uint32_t ecode) {
1894 if (!(ecode > _IWFS_FSM_ERROR_START && ecode < _IWFS_FSM_ERROR_END)) {
1895 return 0;
1896 }
1897 switch (ecode) {
1898 case IWFS_ERROR_NO_FREE_SPACE:
1899 return "No free space. (IWFS_ERROR_NO_FREE_SPACE)";
1900 case IWFS_ERROR_INVALID_BLOCK_SIZE:
1901 return "Invalid block size specified. (IWFS_ERROR_INVALID_BLOCK_SIZE)";
1902 case IWFS_ERROR_RANGE_NOT_ALIGNED:
1903 return "Specified range/offset is not aligned with page/block. "
1904 "(IWFS_ERROR_RANGE_NOT_ALIGNED)";
1905 case IWFS_ERROR_FSM_SEGMENTATION:
1906 return "Free-space map segmentation error. (IWFS_ERROR_FSM_SEGMENTATION)";
1907 case IWFS_ERROR_INVALID_FILEMETA:
1908 return "Invalid file metadata. (IWFS_ERROR_INVALID_FILEMETA)";
1909 case IWFS_ERROR_PLATFORM_PAGE:
1910 return "The block size incompatible with platform page size, data "
1911 "migration required. (IWFS_ERROR_PLATFORM_PAGE)";
1912 case IWFS_ERROR_RESIZE_FAIL:
1913 return "Failed to resize file, "
1914 "conflicting with free-space map location (IWFS_ERROR_RESIZE_FAIL)";
1915 default:
1916 break;
1917 }
1918 return 0;
1919 }
1920
iwfs_fsmfile_init(void)1921 iwrc iwfs_fsmfile_init(void) {
1922 static int _fsmfile_initialized = 0;
1923 iwrc rc = iw_init();
1924 RCRET(rc);
1925 if (!__sync_bool_compare_and_swap(&_fsmfile_initialized, 0, 1)) {
1926 return 0; // initialized already
1927 }
1928 return iwlog_register_ecodefn(_fsmfile_ecodefn);
1929 }
1930
1931 /*************************************************************************************************
1932 * Debug API *
1933 *************************************************************************************************/
1934
iwfs_fsmdbg_number_of_free_areas(IWFS_FSM * f)1935 uint64_t iwfs_fsmdbg_number_of_free_areas(IWFS_FSM *f) {
1936 int ret;
1937 assert(f);
1938 FSM *impl = f->impl;
1939 _fsm_ctrl_rlock(impl);
1940 ret = kb_size(impl->fsm);
1941 _fsm_ctrl_unlock(impl);
1942 return (uint64_t) ret;
1943 }
1944
iwfs_fsmdbg_find_next_set_bit(const uint64_t * addr,uint64_t offset_bit,uint64_t max_offset_bit,int * found)1945 uint64_t iwfs_fsmdbg_find_next_set_bit(const uint64_t *addr, uint64_t offset_bit, uint64_t max_offset_bit,
1946 int *found) {
1947 return _fsm_find_next_set_bit(addr, offset_bit, max_offset_bit, found);
1948 }
1949
iwfs_fsmdbg_find_prev_set_bit(const uint64_t * addr,uint64_t offset_bit,uint64_t min_offset_bit,int * found)1950 uint64_t iwfs_fsmdbg_find_prev_set_bit(const uint64_t *addr, uint64_t offset_bit, uint64_t min_offset_bit,
1951 int *found) {
1952 return _fsm_find_prev_set_bit(addr, offset_bit, min_offset_bit, found);
1953 }
1954
iwfs_fsmdbg_dump_fsm_tree(IWFS_FSM * f,const char * hdr)1955 void iwfs_fsmdbg_dump_fsm_tree(IWFS_FSM *f, const char *hdr) {
1956 assert(f);
1957 FSM *impl = f->impl;
1958 fprintf(stderr, "FSM TREE: %s\n", hdr);
1959 if (!impl->fsm) {
1960 fprintf(stderr, "NONE\n");
1961 return;
1962 }
1963 #define _fsm_traverse(k) \
1964 { \
1965 uint64_t koff = FSMBK_OFFSET(k); \
1966 uint64_t klen = FSMBK_LENGTH(k); \
1967 fprintf(stderr, "[%" PRIu64 " %" PRIu64 "]\n", koff, klen); \
1968 }
1969 __kb_traverse(FSMBK, impl->fsm, _fsm_traverse);
1970 #undef _fsm_traverse
1971 }
1972
1973
byte_to_binary(int x)1974 const char *byte_to_binary(int x) {
1975 static char b[9];
1976 b[0] = '\0';
1977 int z;
1978 for (z = 1; z <= 128; z <<= 1) {
1979 strcat(b, ((x & z) == z) ? "1" : "0");
1980 }
1981 return b;
1982 }
1983
1984
iwfs_fsmdb_dump_fsm_bitmap(IWFS_FSM * f)1985 iwrc iwfs_fsmdb_dump_fsm_bitmap(IWFS_FSM *f) {
1986 assert(f);
1987 size_t sp;
1988 uint8_t *mm;
1989 FSM *impl = f->impl;
1990 iwrc rc;
1991 if (impl->mmap_all) {
1992 rc = impl->pool.probe_mmap(&impl->pool, 0, &mm, &sp);
1993 if (!rc) {
1994 if (sp <= impl->bmoff) {
1995 rc = IWFS_ERROR_NOT_MMAPED;
1996 } else {
1997 mm += impl->bmoff;
1998 sp = sp - impl->bmoff;
1999 }
2000 }
2001 } else {
2002 rc = impl->pool.probe_mmap(&impl->pool, impl->bmoff, &mm, &sp);
2003 }
2004 if (rc) {
2005 iwlog_ecode_error3(rc);
2006 return rc;
2007 }
2008 int i = ((impl->hdrlen >> impl->bpow) >> 3);
2009 // if (impl->bmoff == impl->aunit) {
2010 // i += ((impl->bmlen >> impl->bpow) >> 3);
2011 // }
2012 for (; i < sp && i < impl->bmlen; ++i) {
2013 uint8_t b = *(mm + i);
2014 fprintf(stderr, "%s", byte_to_binary(b));
2015 }
2016 printf("\n");
2017 return 0;
2018 }
2019
2020
iwfs_fsmdbg_state(IWFS_FSM * f,IWFS_FSMDBG_STATE * d)2021 iwrc iwfs_fsmdbg_state(IWFS_FSM *f, IWFS_FSMDBG_STATE *d) {
2022 FSM_ENSURE_OPEN2(f);
2023 FSM *impl = f->impl;
2024 iwrc rc = _fsm_ctrl_rlock(impl);
2025 memset(d, 0, sizeof(*d));
2026 IWRC(impl->pool.state(&impl->pool, &d->state.exfile), rc);
2027 d->state.block_size = 1U << impl->bpow;
2028 d->state.oflags = impl->oflags;
2029 d->state.hdrlen = impl->hdrlen;
2030 d->state.blocks_num = impl->bmlen << 3;
2031 d->state.free_segments_num = (uint64_t) kb_size(impl->fsm);
2032 d->state.avg_alloc_size = impl->crznum > 0 ? (double_t) impl->crzsum / (double_t) impl->crznum : 0;
2033 d->state.alloc_dispersion = impl->crznum > 0 ? (double_t) impl->crzvar / (double_t) impl->crznum : 0;
2034 d->bmoff = impl->bmoff;
2035 d->bmlen = impl->bmlen;
2036 d->lfbkoff = impl->lfbkoff;
2037 d->lfbklen = impl->lfbklen;
2038 IWRC(_fsm_ctrl_unlock(impl), rc);
2039 return rc;
2040 }
2041