1 /*
2 * Copyright (c) 2023 Institute of Parallel And Distributed Systems (IPADS), Shanghai Jiao Tong University (SJTU)
3 * Licensed under the Mulan PSL v2.
4 * You can use this software according to the terms and conditions of the Mulan PSL v2.
5 * You may obtain a copy of Mulan PSL v2 at:
6 * http://license.coscl.org.cn/MulanPSL2
7 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
8 * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
9 * PURPOSE.
10 * See the Mulan PSL v2 for more details.
11 */
12 /*
13 * This file contains some internal operations
14 * that are closely related to the management of
15 * some core data structures.
16 *
17 * These functions are mostly called using function pointers(with rare
18 * exceptions), by the form of "methods" in OOP languages. Most of these
19 * functions *are not* meant to be called directly!
20 *
21 * We have grouped these operations into four catagories
22 * 1. base inode operations 2. regular file operations
23 * 3. directory operations 4. symlink operations
24 */
25
26 #include "chcore/container/hashtable.h"
27 #include "chcore/container/list.h"
28 #include "dirent.h"
29 #include "sys/stat.h"
30 #include "sys/mman.h"
31 #include <chcore/error.h>
32 #include <chcore/falloc.h>
33 #include <chcore/container/radix.h>
34 #include "defs.h"
35 #include <stddef.h>
36 #include <stdlib.h>
37 #include <errno.h>
38 #include <limits.h>
39 #include <string.h>
40
41 struct inode *tmpfs_root = NULL;
42 struct dentry *tmpfs_root_dent = NULL;
43
44 struct base_inode_ops base_inode_ops;
45 struct regfile_ops regfile_ops;
46 struct dir_ops dir_ops;
47 struct symlink_ops symlink_ops;
48
49 #if DEBUG_MEM_USAGE
debug_radix_free(void * page)50 void debug_radix_free(void *page)
51 {
52 tmpfs_revoke_mem_usage(page, DATA_PAGE);
53 munmap_page(page);
54 }
55 #endif
56
munmap_page(void * page)57 void munmap_page(void *page) {
58 munmap(page, PAGE_SIZE);
59 }
60
61 /* string utils */
hash_chars(const char * str,size_t len)62 u64 hash_chars(const char *str, size_t len)
63 {
64 u64 seed = 131; /* 31 131 1313 13131 131313 etc.. */
65 u64 hash = 0;
66 int i;
67
68 if (len < 0) {
69 while (*str) {
70 hash = (hash * seed) + *str;
71 str++;
72 }
73 } else {
74 for (i = 0; i < len; ++i)
75 hash = (hash * seed) + str[i];
76 }
77
78 return hash;
79 }
80
hash_string(struct string * s)81 void hash_string(struct string *s)
82 {
83 s->hash = hash_chars(s->str, s->len);
84 }
85
init_string(struct string * s,const char * name,size_t len)86 int init_string(struct string *s, const char *name, size_t len)
87 {
88 free_string(s);
89
90 /*
91 * s->str is allocated and copied,
92 * remember to free it afterwards
93 */
94 s->str = malloc(len + 1);
95 if (!s->str) {
96 return -ENOMEM;
97 }
98
99 #if DEBUG_MEM_USAGE
100 tmpfs_record_mem_usage(s->str, len + 1, STRING);
101 #endif
102
103 memcpy(s->str, name, len);
104 s->str[len] = '\0';
105
106 s->len = len;
107
108 hash_string(s);
109 return 0;
110 }
111
free_string(struct string * s)112 void free_string(struct string *s)
113 {
114 if (s->str) {
115 #if DEBUG_MEM_USAGE
116 tmpfs_revoke_mem_usage(s->str, STRING);
117 #endif
118 free(s->str);
119 }
120 s->str = NULL;
121 }
122
123 /* FS operations */
124
125 /**
126 * @brief Get the fs's metadata.
127 * @return statbuf A pointer to the caller provided buff, filled with
128 * preset metadata of tmpfs.
129 */
tmpfs_fs_stat(struct statfs * statbuf)130 void tmpfs_fs_stat(struct statfs *statbuf)
131 {
132 const int faked_large_value = 1024 * 1024 * 512;
133 /* Type of filesystem (see below): ChCorE7mpf5 */
134 statbuf->f_type = 0xCCE7A9F5;
135 /* Optimal transfer block size */
136 statbuf->f_bsize = PAGE_SIZE;
137 /* Total data blocks in filesystem */
138 statbuf->f_blocks = faked_large_value;
139 /* Free blocks in filesystem */
140 statbuf->f_bfree = faked_large_value;
141 /* Free blocks available to unprivileged user */
142 statbuf->f_bavail = faked_large_value;
143 /* Total file nodes in filesystem */
144 statbuf->f_files = faked_large_value;
145 /* Free file nodes in filesystem */
146 statbuf->f_ffree = faked_large_value;
147 /* Filesystem ID (See man page) */
148 memset(&statbuf->f_fsid, 0, sizeof(statbuf->f_fsid));
149 /* Maximum length of filenames */
150 statbuf->f_namelen = MAX_PATH;
151 /* Fragment size (since Linux 2.6) */
152 statbuf->f_frsize = 512;
153 /* Mount flags of filesystem (since Linux 2.6.36) */
154 statbuf->f_flags = 0;
155 }
156
157 /* Base inode operations */
158
159 /**
160 * @brief Create an empty inode of a particular type.
161 * @param type The type of the inode to be created,
162 * can be one of FS_REG, FS_DIR, FS_SYM.
163 * @param mode The access mode of the file, ignored for now.
164 * @return inode The newly created inode, NULL if type is illegal or out of
165 * memory.
166 * @note The created inode is not ready for use. For example the directory is
167 * empty and has no dot and dotdot dentries.
168 */
tmpfs_inode_init(int type,mode_t mode)169 struct inode *tmpfs_inode_init(int type, mode_t mode)
170 {
171 struct inode *inode = malloc(sizeof(struct inode));
172
173 if (inode == NULL) {
174 return inode;
175 }
176
177 #if DEBUG_MEM_USAGE
178 tmpfs_record_mem_usage(inode, sizeof(struct inode), INODE);
179 #endif
180
181 /* Type-specific fields initialization */
182 switch (type) {
183 case FS_REG:
184 inode->f_ops = ®file_ops;
185
186 #if DEBUG_MEM_USAGE
187 init_radix_w_deleter(&inode->data, debug_radix_free);
188 #else
189 init_radix_w_deleter(&inode->data, munmap_page);
190 #endif
191 break;
192 case FS_DIR:
193 inode->d_ops = &dir_ops;
194 init_htable(&inode->dentries, MAX_DIR_HASH_BUCKETS);
195 break;
196 case FS_SYM:
197 inode->sym_ops = &symlink_ops;
198 inode->symlink = NULL;
199 break;
200 default:
201 #if DEBUG_MEM_USAGE
202 tmpfs_revoke_mem_usage(inode, INODE);
203 #endif
204
205 free(inode);
206 return NULL;
207 }
208
209 inode->type = type;
210 inode->nlinks = 0; /* ZERO links now */
211 inode->size = 0;
212 inode->mode = mode;
213 inode->opened = false;
214 inode->base_ops = &base_inode_ops;
215
216 return inode;
217 }
218
219 /**
220 * @brief Open a file.
221 * @param inode Inode to be opened.
222 * @note Tmpfs does not have anything like an openfile table, and does not to do
223 * reference counting on the opened files, they are all done in fs_base. The
224 * only thing we have to worry is that it is completely legal for a file to be
225 * unlinked when someone is still holding the open file handle and using it, and
226 * we can not free the inode at that time, so a flag indicates the file is
227 * opened is needed.
228 */
tmpfs_inode_open(struct inode * inode)229 static inline void tmpfs_inode_open(struct inode *inode)
230 {
231 inode->opened = true;
232 }
233
234 /**
235 * @brief Close a file. If the file has no link, free it.
236 * @param inode Inode to be closed
237 * @note As said above, when closing, the file may be unlinked before, we should
238 * clean it up then.
239 */
tmpfs_inode_close(struct inode * inode)240 static void tmpfs_inode_close(struct inode *inode)
241 {
242 #if DEBUG
243 BUG_ON(!inode->opened);
244 BUG_ON(inode->nlinks < 0);
245 #endif
246
247 inode->opened = false;
248
249 if (inode->nlinks == 0) {
250 inode->base_ops->free(inode);
251 }
252 }
253
254 /**
255 * @brief Increase the inode's nlinks by 1.
256 * @param inode
257 */
tmpfs_inode_inc_nlinks(struct inode * inode)258 static inline void tmpfs_inode_inc_nlinks(struct inode *inode)
259 {
260 #if DEBUG
261 BUG_ON(inode->nlinks < 0);
262 #endif
263 inode->nlinks++;
264 }
265
266 /**
267 * @brief Decrease the inode's nlinks by 1. If nlinks reaches
268 * zero and the inode is not being used, free it.
269 * @param inode
270 */
tmpfs_inode_dec_nlinks(struct inode * inode)271 static void tmpfs_inode_dec_nlinks(struct inode *inode)
272 {
273 #if DEBUG
274 BUG_ON(inode->nlinks <= 0);
275 #endif
276
277 inode->nlinks--;
278 if (inode->nlinks == 0 && !inode->opened) {
279 inode->base_ops->free(inode);
280 }
281 }
282
283 /**
284 * @brief Free an inode.
285 * @param inode Inode to be freed.
286 */
tmpfs_inode_free(struct inode * inode)287 static void tmpfs_inode_free(struct inode *inode)
288 {
289 /* freeing type-specific fields */
290 switch (inode->type) {
291 case FS_REG:
292 radix_free(&inode->data);
293 break;
294 case FS_DIR:
295 htable_free(&inode->dentries);
296 break;
297 case FS_SYM:
298 if (inode->symlink) {
299 #if DEBUG_MEM_USAGE
300 tmpfs_revoke_mem_usage(inode->symlink, SYMLINK);
301 #endif
302 free(inode->symlink);
303 }
304 break;
305 default:
306 BUG_ON(1);
307 }
308
309 #if DEBUG_MEM_USAGE
310 tmpfs_revoke_mem_usage(inode, INODE);
311 #endif
312 free(inode);
313 }
314
315 /**
316 * @brief Get an inode's metadata.
317 * @param inode The inode to stat.
318 * @return stat The pointer of the buffer to fill the stats in.
319 */
tmpfs_inode_stat(struct inode * inode,struct stat * stat)320 static void tmpfs_inode_stat(struct inode *inode, struct stat *stat)
321 {
322 memset(stat, 0, sizeof(struct stat));
323
324 /* We currently support only a small part of stat fields */
325 switch (inode->type) {
326 case FS_DIR:
327 stat->st_mode = S_IFDIR;
328 break;
329 case FS_REG:
330 stat->st_mode = S_IFREG;
331 break;
332 case FS_SYM:
333 stat->st_mode = S_IFLNK;
334 break;
335 default:
336 BUG_ON(1);
337 }
338
339 #if DEBUG
340 BUG_ON(inode->size >= LONG_MAX);
341 #endif
342
343 stat->st_size = (off_t)inode->size;
344 stat->st_nlink = inode->nlinks;
345 stat->st_ino = (ino_t)(uintptr_t)inode;
346 }
347
348 /* Regular file operations */
349
350 /**
351 * @brief read a file's content at offset, and read for size bytes.
352 * @param reg The file to be read.
353 * @param size Read at most size bytes.
354 * @param offset The starting offset of the read.
355 * @param buff The caller provided buffer to be filled with the file content.
356 * @return ssize_t The actual number of bytes that have been read into the
357 * buffer.
358 */
tmpfs_file_read(struct inode * reg,char * buff,size_t size,off_t offset)359 static ssize_t tmpfs_file_read(struct inode *reg, char *buff, size_t size,
360 off_t offset)
361 {
362 #if DEBUG
363 BUG_ON(reg->type != FS_REG);
364 #endif
365
366 u64 page_no, page_off;
367 u64 cur_off = offset;
368 size_t to_read;
369 void *page;
370
371 /* Returns 0 according to man pages. */
372 if (offset >= reg->size)
373 return 0;
374
375 size = MIN(reg->size - offset, size);
376
377 while (size > 0 && cur_off <= reg->size) {
378 page_no = cur_off / PAGE_SIZE;
379 page_off = cur_off % PAGE_SIZE;
380
381 page = radix_get(®->data, page_no);
382 to_read = MIN(size, PAGE_SIZE - page_off);
383 if (!page)
384 memset(buff, 0, to_read);
385 else
386 memcpy(buff, page + page_off, to_read);
387 cur_off += to_read;
388 buff += to_read;
389 size -= to_read;
390 }
391
392 return (ssize_t)(cur_off - offset);
393 }
394
395 /**
396 * @brief Write a file's content at offset, and write for size bytes.
397 * @param reg The file to be written.
398 * @param buff The caller provided buffer that is to be written into the file.
399 * @param size Write at most size bytes.
400 * @param offset The starting offset of the write.
401 * @return ssize_t The actual number of bytes that have been written.
402 */
tmpfs_file_write(struct inode * reg,const char * buff,size_t len,off_t offset)403 static ssize_t tmpfs_file_write(struct inode *reg, const char *buff, size_t len,
404 off_t offset)
405 {
406 #if DEBUG
407 BUG_ON(reg->type != FS_REG);
408 #endif
409
410 u64 page_no, page_off;
411 off_t cur_off = offset;
412 int to_write;
413 void *page = NULL;
414
415 if (len == 0)
416 return 0;
417
418 while (len > 0) {
419 page_no = cur_off / PAGE_SIZE;
420 page_off = cur_off % PAGE_SIZE;
421
422 page = radix_get(®->data, page_no);
423 if (!page) {
424 page = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
425 if (!page)
426 return (ssize_t)(cur_off - offset);
427
428 #if DEBUG_MEM_USAGE
429 tmpfs_record_mem_usage(page, PAGE_SIZE, DATA_PAGE);
430 #endif
431
432 if (page_off)
433 memset(page, 0, page_off);
434 radix_add(®->data, page_no, page);
435 }
436
437 #if DEBUG
438 BUG_ON(page == NULL);
439 #endif
440
441 to_write = MIN(len, PAGE_SIZE - page_off);
442 memcpy(page + page_off, buff, to_write);
443 cur_off += to_write;
444 buff += to_write;
445 len -= to_write;
446 }
447
448 if (cur_off > reg->size) {
449 reg->size = cur_off;
450 if (cur_off % PAGE_SIZE && page) {
451 /* if the last write cannot fill the last page, set the
452 * remaining space to zero to ensure the correctness of
453 * the file_read */
454 page_off = cur_off % PAGE_SIZE;
455 memset(page + page_off, 0, PAGE_SIZE - page_off);
456 }
457 }
458 return (ssize_t)(cur_off - offset);
459 }
460
461 /**
462 * @brief Change a file's size to a length.
463 * @param reg The file to truncate.
464 * @param len The length to change to.
465 * @return 0 on success.
466 * @note When increasing the file's size, we only allocate the memory space in a
467 * lazy fashion. If do need the space to be allocated, consider functions
468 * related to fallocate().
469 */
tmpfs_file_truncate(struct inode * reg,off_t len)470 static int tmpfs_file_truncate(struct inode *reg, off_t len)
471 {
472 #if DEBUG
473 BUG_ON(reg->type != FS_REG);
474 #endif
475
476 u64 page_no, page_off;
477 void *page;
478 if (len == 0) {
479 /* free radix tree and init an empty one */
480 radix_free(®->data);
481
482 #if DEBUG_MEM_USAGE
483 init_radix_w_deleter(®->data, debug_radix_free);
484 #else
485 init_radix_w_deleter(®->data, munmap_page);
486 #endif
487 reg->size = 0;
488 } else if (len > reg->size) {
489 /* truncate should not allocate the space for the file */
490 reg->size = len;
491 } else if (len < reg->size) {
492 size_t cur_off = len;
493 size_t to_write;
494 page_no = cur_off / PAGE_SIZE;
495 page_off = cur_off % PAGE_SIZE;
496 if (page_off) {
497 /*
498 * if the last write cannot fill the last page, set the
499 * remaining space to zero to ensure the correctness of
500 * the file_read
501 */
502 page = radix_get(®->data, page_no);
503 if (page) {
504 to_write = MIN(reg->size - len, PAGE_SIZE - page_off);
505 memset(page + page_off, 0, to_write);
506 cur_off += to_write;
507 }
508 }
509 while (cur_off < reg->size) {
510 page_no = cur_off / PAGE_SIZE;
511 radix_del(®->data, page_no, 1);
512 cur_off += PAGE_SIZE;
513 }
514 reg->size = len;
515 }
516
517 return 0;
518 }
519
520 /**
521 * @brief Deallocate disk space(memory in tmpfs) in the range specified by
522 * params offset to offset + len for a file.
523 * @param reg The file to modify.
524 * @param offset The start of the deallocate range.
525 * @param len The length of the deallocate range.
526 * @return int 0 on success, -1 if radix_del() fails.
527 * @note For full pages in the range, they are deleted from the file. For
528 * partial pages in the range, they are set to zero.
529 */
tmpfs_file_punch_hole(struct inode * reg,off_t offset,off_t len)530 static int tmpfs_file_punch_hole(struct inode *reg, off_t offset, off_t len)
531 {
532 #if DEBUG
533 BUG_ON(reg->type != FS_REG);
534 #endif
535
536 u64 page_no, page_off;
537 u64 cur_off = offset;
538 off_t to_remove;
539 void *page;
540 int err;
541
542 while (len > 0) {
543 page_no = cur_off / PAGE_SIZE;
544 page_off = cur_off % PAGE_SIZE;
545
546 to_remove = MIN(len, PAGE_SIZE - page_off);
547 cur_off += to_remove;
548 len -= to_remove;
549 page = radix_get(®->data, page_no);
550 if (page) {
551 /*
552 * Linux Manpage:
553 * Within the specified range, partial filesystem blocks
554 * are zeroed, and whole filesystem blocks are removed
555 * from the file.
556 */
557 if (to_remove == PAGE_SIZE || cur_off == reg->size) {
558 err = radix_del(®->data, page_no, 1);
559 /* if no err, just continue! */
560 if (err) {
561 return err;
562 }
563 } else {
564 memset(page + page_off, 0, to_remove);
565 }
566 }
567 }
568 return 0;
569 }
570
571 /**
572 * @brief Collapse file space in the range from offset to offset + len.
573 * @param reg The file to be collapsed.
574 * @param offset The start of the to be collapsed range.
575 * @param len The length of the range.
576 * @return int 0 on success, -EINVAL if the granularity not match or the params
577 are illegal, or -1 if radix_del() fails, or -ENOMEM if radix_add() fails.
578 * @note The range will be *removed*, without leaving a hole in the file. That
579 is, the content start at offset + len will be at offset when this is done, and
580 the file size will be len bytes smaller. Tmpfs only allows this to be done at a
581 page granularity.
582 */
tmpfs_file_collapse_range(struct inode * reg,off_t offset,off_t len)583 static int tmpfs_file_collapse_range(struct inode *reg, off_t offset, off_t len)
584 {
585 #if DEBUG
586 BUG_ON(reg->type != FS_REG);
587 #endif
588
589 u64 page_no1, page_no2;
590 u64 cur_off = offset;
591 void *page1;
592 void *page2;
593 u64 remain;
594 int err;
595 off_t dist;
596
597 /* To ensure efficient implementation, offset and len must be a mutiple
598 * of the filesystem logical block size */
599 if (offset % PAGE_SIZE || len % PAGE_SIZE)
600 return -EINVAL;
601 if (offset + len >= reg->size)
602 return -EINVAL;
603
604 remain = ((reg->size + PAGE_SIZE - 1) - (offset + len)) / PAGE_SIZE;
605 dist = len / PAGE_SIZE;
606 while (remain-- > 0) {
607 page_no1 = cur_off / PAGE_SIZE;
608 page_no2 = page_no1 + dist;
609
610 cur_off += PAGE_SIZE;
611 page1 = radix_get(®->data, page_no1);
612 page2 = radix_get(®->data, page_no2);
613 if (page1) {
614 err = radix_del(®->data, page_no1, 1);
615 if (err)
616 goto error;
617 }
618 if (page2) {
619 err = radix_add(®->data, page_no1, page2);
620 if (err)
621 goto error;
622 err = radix_del(®->data, page_no2, 0);
623 if (err)
624 goto error;
625 }
626 }
627
628 reg->size -= len;
629 return 0;
630
631 error:
632 error("Error in collapse range!\n");
633 return err;
634 }
635
636 /**
637 * @brief Zero the given range of a file.
638 * @param reg The file to modify.
639 * @param offset The start of the range.
640 * @param len The length of the range.
641 * @param mode The allocate mode of this call.
642 * @return 0 on success, -ENOSPC when out of disk(memory) space.
643 * @note The range will be set to zero, missing pages will be allocated. If
644 * FALLOC_FL_KEEP_SIZE is set in mode, the file size will not be changed.
645 */
tmpfs_file_zero_range(struct inode * reg,off_t offset,off_t len,mode_t mode)646 static int tmpfs_file_zero_range(struct inode *reg, off_t offset, off_t len,
647 mode_t mode)
648 {
649 #if DEBUG
650 BUG_ON(reg->type != FS_REG);
651 #endif
652
653 u64 page_no, page_off;
654 u64 cur_off = offset;
655 off_t length = len;
656 off_t to_zero;
657 void *page;
658
659 while (len > 0) {
660 page_no = cur_off / PAGE_SIZE;
661 page_off = cur_off % PAGE_SIZE;
662
663 to_zero = MIN(len, PAGE_SIZE - page_off);
664 cur_off += to_zero;
665 len -= to_zero;
666 if (!len)
667 to_zero = PAGE_SIZE;
668 page = radix_get(®->data, page_no);
669 if (!page) {
670 page = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
671 if (!page)
672 return -ENOSPC;
673 #if DEBUG_MEM_USAGE
674 tmpfs_record_mem_usage(page, PAGE_SIZE, DATA_PAGE);
675 #endif
676 radix_add(®->data, page_no, page);
677 }
678
679 #if DEBUG
680 BUG_ON(!page);
681 #endif
682
683 memset(page + page_off, 0, to_zero);
684 }
685
686 if ((!(mode & FALLOC_FL_KEEP_SIZE)) && (offset + length > reg->size))
687 reg->size = offset + length;
688
689 return 0;
690 }
691
692 /**
693 * @brief Increasing a file's space.
694 * @param reg The file to modify.
695 * @param offset The start of the range.
696 * @param len The length of the range.
697 * @return int 0 on success, -EINVAL if the granularity not match or the params
698 are illegal, or -1 if radix_del() fails, or -ENOMEM if radix_add() fails.
699 * @note The operation inserts spaces at offset with length len, that is: the
700 content starting at the offset will be moved to offset + len, and the size of
701 the file increased by len. Tmpfs only allows this to happen at page
702 granularity.
703 */
tmpfs_file_insert_range(struct inode * reg,off_t offset,off_t len)704 static int tmpfs_file_insert_range(struct inode *reg, off_t offset, off_t len)
705 {
706 #if DEBUG
707 BUG_ON(reg->type != FS_REG);
708 #endif
709
710 u64 page_no1, page_no2;
711 void *page;
712 int err;
713 off_t dist;
714
715 /* To ensure efficient implementation, this mode has the same
716 * limitations as FALLOC_FL_COLLAPSE_RANGE regarding the granularity of
717 * the operation. (offset and len must be a mutiple of the filesystem
718 * logical block size) */
719 if (offset % PAGE_SIZE || len % PAGE_SIZE)
720 return -EINVAL;
721 /* If the offset is equal to or greater than the EOF, an error is
722 * returned. For such operations, ftruncate should be used. */
723 if (offset >= reg->size)
724 return -EINVAL;
725
726 page_no1 = (reg->size + PAGE_SIZE - 1) / PAGE_SIZE;
727 dist = len / PAGE_SIZE;
728 while (page_no1 >= offset / PAGE_SIZE) {
729 page_no2 = page_no1 + dist;
730 #if DEBUG
731 BUG_ON(radix_get(®->data, page_no2));
732 #endif
733 page = radix_get(®->data, page_no1);
734 if (page) {
735 err = radix_del(®->data, page_no1, 0);
736 if (err)
737 goto error;
738 err = radix_add(®->data, page_no2, page);
739 if (err)
740 goto error;
741 }
742 page_no1--;
743 }
744
745 reg->size += len;
746 return 0;
747
748 error:
749 error("Error in insert range!\n");
750 return err;
751 }
752
753 /**
754 * @brief Allocate disk space(memory in tmpfs) for the file, from offset to
755 * offset + len, and zero any newly allocated memory space. This ensures later
756 * operations within the allocated range will not fail because of lack of memory
757 * space.
758 * @param reg The file to operate with.
759 * @param offset The operation starts at offset.
760 * @param len The length of the allocated range.
761 * @param keep_size If keep_size is set, the file size will not be changed.
762 * Otherwise, if offset + len > reg->size, reg->size will be changed to offset +
763 * len.
764 * @return int 0 on success, -ENOSPC if not enough disk(memory in tmpfs) space.
765 */
tmpfs_file_allocate(struct inode * reg,off_t offset,off_t len,int keep_size)766 static int tmpfs_file_allocate(struct inode *reg, off_t offset, off_t len,
767 int keep_size)
768 {
769 #if DEBUG
770 BUG_ON(reg->type != FS_REG);
771 #endif
772
773 u64 page_no;
774 u64 cur_off = offset;
775 void *page;
776
777 while (cur_off < offset + len) {
778 page_no = cur_off / PAGE_SIZE;
779
780 page = radix_get(®->data, page_no);
781 if (!page) {
782 page = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
783 if (!page)
784 return -ENOSPC;
785 #if DEBUG_MEM_USAGE
786 tmpfs_record_mem_usage(page, PAGE_SIZE, DATA_PAGE);
787 #endif
788
789 if (radix_add(®->data, page_no, page)) {
790 #if DEBUG_MEM_USAGE
791 tmpfs_revoke_mem_usage(page, DATA_PAGE);
792 #endif
793 munmap_page(page);
794 return -ENOSPC;
795 }
796 memset(page, 0, PAGE_SIZE);
797 }
798 cur_off += PAGE_SIZE;
799 }
800
801 if (offset + len > reg->size && !keep_size) {
802 reg->size = offset + len;
803 }
804 return 0;
805 }
806
807 /* Directory operations */
808
809 /**
810 * @brief Allocate a dentry, and initialize it with the parent dentry.
811 * @param d_parent The parent dentry of the to-be-allocated dentry.
812 * @return The newly created dentry
813 */
tmpfs_alloc_dent()814 static struct dentry *tmpfs_alloc_dent()
815 {
816 struct dentry *dentry = malloc(sizeof(struct dentry));
817 if (!dentry) {
818 return CHCORE_ERR_PTR(-ENOMEM);
819 }
820
821 #if DEBUG_MEM_USAGE
822 tmpfs_record_mem_usage(dentry, sizeof(struct dentry), DENTRY);
823 #endif
824
825 dentry->inode = NULL;
826 dentry->name.str = NULL;
827 return dentry;
828 }
829
830 /**
831 * @brief Free a dentry and its name string.
832 * @param inode The inode which does this operation, and should be the
833 * to-be-freed dentry's parent directory.
834 * @param dentry The dentry that is to be freed.
835 */
tmpfs_free_dent(struct dentry * dentry)836 static void tmpfs_free_dent(struct dentry *dentry)
837 {
838 free_string(&dentry->name);
839
840 #if DEBUG_MEM_USAGE
841 tmpfs_revoke_mem_usage(dentry, DENTRY);
842 #endif
843 free(dentry);
844 }
845
846 /**
847 * @brief Add a just allocated dentry into a directory, and adjust the dir's
848 * size.
849 * @param dir The directory to add the new dentry.
850 * @param new_dent The newly created dentry that will be added to the dir.
851 * @param name The name of the dentry.
852 * @param len The length of the name.
853 * @return int 0 for success, -ENOMEM if init_string() fails.
854 * @note This function does not create the new dentry, it presuppose a created
855 * dentry.
856 */
tmpfs_dir_add_dent(struct inode * dir,struct dentry * new_dent,char * name,size_t len)857 static int tmpfs_dir_add_dent(struct inode *dir, struct dentry *new_dent,
858 char *name, size_t len)
859 {
860 #if DEBUG
861 BUG_ON(dir->type != FS_DIR);
862 #endif
863
864 int err;
865
866 err = init_string(&new_dent->name, name, len); /* copy the str */
867 if (err) {
868 return err;
869 }
870
871 htable_add(&dir->dentries, (u32)(new_dent->name.hash), &new_dent->node);
872 dir->size += DENT_SIZE;
873
874 return 0;
875 }
876
877 /**
878 * @brief Remove a dentry from the dir's htable, adjust the size.
879 * @param dir the directory to remove the dentry from.
880 * @param dentry the dentry to be removed.
881 * @note This operation only operate on the directory, *dentry is not freed
882 */
tmpfs_dir_remove_dent(struct inode * dir,struct dentry * dentry)883 static void tmpfs_dir_remove_dent(struct inode *dir, struct dentry *dentry)
884 {
885 #if DEBUG
886 BUG_ON(dir->type != FS_DIR);
887 #endif
888
889 htable_del(&dentry->node);
890 dir->size -= DENT_SIZE;
891 /* not freeing the dentry now */
892 }
893
894 /**
895 * @brief Check if the directory is an empty and legal one.
896 * @param dir The directory to be checked.
897 * @return bool true for empty and legal, false otherwise.
898 * @note By saying empty and legal, we mean the directory should *have and only
899 * have* the dot dentry and the dotdot dentry in its dentries table.
900 */
tmpfs_dir_empty(struct inode * dir)901 static bool tmpfs_dir_empty(struct inode *dir)
902 {
903 #if DEBUG
904 BUG_ON(dir->type != FS_DIR);
905 #endif
906
907 struct dentry *iter;
908 int i = 0;
909 int ret;
910
911 bool found_dot = false, found_dotdot = false;
912
913 for_each_in_htable (iter, i, node, &dir->dentries) {
914 if ((ret = strcmp(iter->name.str, ".")) != 0
915 && strcmp(iter->name.str, "..") != 0) {
916 return false;
917 }
918
919 if (ret == 0) {
920 found_dot = true;
921 } else {
922 found_dotdot = true;
923 }
924 }
925
926 return found_dot && found_dotdot;
927 }
928
929 /**
930 * @brief Link the dentry with the inode, and do nothing else.
931 * @param dir the parent directory which the dentry belongs to.
932 * @param dentry The dentry, which has already been added to the directory, to
933 * hold the inode.
934 * @param inode The inode to be linked with the dentry.
935 */
tmpfs_dir_link(struct inode * dir,struct dentry * dentry,struct inode * inode)936 static void tmpfs_dir_link(struct inode *dir, struct dentry *dentry,
937 struct inode *inode)
938 {
939 #if DEBUG
940 BUG_ON(dir->type != FS_DIR);
941 #endif
942
943 dentry->inode = inode;
944 inode->base_ops->inc_nlinks(inode);
945 }
946
947 /**
948 * @brief Unlink the dentry with its inode, and also remove the dentry from the
949 * directory and free the dentry.
950 * @param dir The parent directory which has the dentry in it.
951 * @param dentry The dentry to be unlinked.
952 */
tmpfs_dir_unlink(struct inode * dir,struct dentry * dentry)953 static void tmpfs_dir_unlink(struct inode *dir, struct dentry *dentry)
954 {
955 #if DEBUG
956 BUG_ON(dir->type != FS_DIR);
957 BUG_ON(dentry->inode == NULL);
958 #endif
959
960 struct inode *inode = dentry->inode;
961
962 dir->d_ops->remove_dentry(dir, dentry);
963 dir->d_ops->free_dentry(dentry);
964 inode->base_ops->dec_nlinks(inode);
965 }
966
967 /**
968 * @brief Make a new directory under a parent dir.
969 * @param dir The parent dir to hold the new directory.
970 * @param dentry The dentry of the new directory.
971 * @param mode The access mode of the dir, ignored.
972 * @return int 0 on success, -ENOMEM on failure.
973 * @return inode Upon success, a new directory inode is created and can be
974 * accessed by dentry->inode.
975 */
tmpfs_dir_mkdir(struct inode * dir,struct dentry * dentry,mode_t mode)976 static int tmpfs_dir_mkdir(struct inode *dir, struct dentry *dentry,
977 mode_t mode)
978 {
979 #if DEBUG
980 BUG_ON(dir->type != FS_DIR);
981 #endif
982
983 int err = dir->d_ops->mknod(dir, dentry, mode, FS_DIR);
984 if (err) {
985 return err;
986 }
987
988 struct inode *new_dir = dentry->inode;
989
990 struct dentry *dot = new_dir->d_ops->alloc_dentry();
991 if (CHCORE_IS_ERR(dot)) {
992 err = CHCORE_PTR_ERR(dot);
993 goto free_node;
994 }
995
996 err = new_dir->d_ops->add_dentry(new_dir, dot, ".", 1);
997 if (err) {
998 goto free_dot;
999 }
1000
1001 new_dir->d_ops->link(new_dir, dot, new_dir);
1002
1003 struct dentry *dotdot = new_dir->d_ops->alloc_dentry();
1004 if (CHCORE_IS_ERR(dotdot)) {
1005 err = CHCORE_PTR_ERR(dotdot);
1006 goto remove_dot;
1007 }
1008
1009 err = new_dir->d_ops->add_dentry(new_dir, dotdot, "..", 2);
1010 if (err) {
1011 goto free_dotdot;
1012 }
1013
1014 new_dir->d_ops->link(new_dir, dotdot, dir);
1015
1016 return 0;
1017
1018 free_dotdot:
1019 new_dir->d_ops->free_dentry(dotdot);
1020 remove_dot:
1021 new_dir->d_ops->remove_dentry(new_dir, dot);
1022 free_dot:
1023 new_dir->d_ops->free_dentry(dot);
1024 free_node:
1025 dentry->inode->base_ops->free(dentry->inode);
1026 /* the caller-allocated dentry is not freed */
1027 dentry->inode = NULL;
1028
1029 return err;
1030 }
1031
1032 /**
1033 * @brief Remove an empty directory and its dentry under the parent directory.
1034 * @param dir The parent directory which holds the to-be-removed directory.
1035 * @param dentry The dentry of the directory to remove.
1036 * @return int 0 on success, -ENOTEMPTY if the to-be-removed directory is not
1037 * empty.
1038 */
tmpfs_dir_rmdir(struct inode * dir,struct dentry * dentry)1039 static int tmpfs_dir_rmdir(struct inode *dir, struct dentry *dentry)
1040 {
1041 #if DEBUG
1042 BUG_ON(dir->type != FS_DIR);
1043 BUG_ON(dentry->inode->type != FS_DIR);
1044 #endif
1045
1046 struct inode *to_remove = dentry->inode;
1047
1048 if (!to_remove->d_ops->is_empty(to_remove)) {
1049 return -ENOTEMPTY;
1050 }
1051
1052 struct dentry *dot = to_remove->d_ops->dirlookup(to_remove, ".", 1);
1053 struct dentry *dotdot = to_remove->d_ops->dirlookup(to_remove, "..", 2);
1054
1055 #if DEBUG
1056 BUG_ON(dot == NULL || dotdot == NULL);
1057 #endif
1058
1059 to_remove->d_ops->unlink(to_remove, dot);
1060 to_remove->d_ops->unlink(to_remove, dotdot);
1061
1062 dir->d_ops->unlink(dir, dentry);
1063
1064 return 0;
1065 }
1066
1067 /*
1068 * All illegal cases have been handled outside
1069 * Call to this function is guaranteed to be legal and will succeed
1070 */
1071 /**
1072 * @brief Rename an inode by reclaiming its dentry.
1073 * @param old_dir The parent directory where the inode was under.
1074 * @param old_dentry The dentry for the inode under old_dir.
1075 * @param new_dir The parent directory where the inode is moving to.
1076 * @param new_dentry The dentry that the inode is moving to. Should be added to
1077 * the new_dir already.
1078 * @note The rename() system call is very different with this simple function
1079 * because of all the corner cases and checks. This operation is simple because
1080 * it presupposes all the checks have been done outside before the call to this
1081 * function so it can only handle the legal case and is guaranteed to succeed.
1082 */
tmpfs_dir_rename(struct inode * old_dir,struct dentry * old_dentry,struct inode * new_dir,struct dentry * new_dentry)1083 static void tmpfs_dir_rename(struct inode *old_dir, struct dentry *old_dentry,
1084 struct inode *new_dir, struct dentry *new_dentry)
1085 {
1086 #if DEBUG
1087 BUG_ON(new_dir->type != FS_DIR);
1088 BUG_ON(old_dir->type != FS_DIR);
1089 #endif
1090
1091 /* we just link the new_dentry to the inode and unlink the old_dentry */
1092 struct inode *inode = old_dentry->inode;
1093
1094 new_dir->d_ops->link(new_dir, new_dentry, inode); /* link first */
1095 old_dir->d_ops->unlink(old_dir, old_dentry);
1096
1097 if (inode->type == FS_DIR) {
1098 struct dentry *dotdot = inode->d_ops->dirlookup(inode, "..", 2);
1099
1100 #if DEBUG
1101 struct dentry *dot = inode->d_ops->dirlookup(inode, ".", 1);
1102 BUG_ON(!dot);
1103 BUG_ON(!dotdot);
1104 BUG_ON(dot->inode != inode);
1105 BUG_ON(dotdot->inode != old_dir);
1106 #endif
1107
1108 dotdot->inode = new_dir;
1109
1110 /* dotdot is changed */
1111 old_dir->base_ops->dec_nlinks(old_dir);
1112 new_dir->base_ops->inc_nlinks(new_dir);
1113 }
1114 }
1115
1116 /**
1117 * @brief Make a new inode under the parent directory.
1118 * @param dir The parent directory to hold the newly created inode.
1119 * @param dentry The dentry for the new inode. It should be already allocated
1120 * and added into the dir.
1121 * @param mode The access mode of this new inode. Ignored.
1122 * @param type The type of the inode. FS_REG, FS_DIR, FS_SYM can be used.
1123 * @return int 0 on success, -ENOMEM if the type is wrong or no enough memory.
1124 * @return inode On success, the newly created inode is returned and can be
1125 * accessed in dentry->inode.
1126 */
tmpfs_dir_mknod(struct inode * dir,struct dentry * dentry,mode_t mode,int type)1127 static int tmpfs_dir_mknod(struct inode *dir, struct dentry *dentry,
1128 mode_t mode, int type)
1129 {
1130 #if DEBUG
1131 BUG_ON(dir->type != FS_DIR);
1132 #endif
1133
1134 struct inode *inode = tmpfs_inode_init(type, mode);
1135 if (inode == NULL) {
1136 return -ENOMEM;
1137 }
1138
1139 /* linking the inode and the dentry */
1140 dir->d_ops->link(dir, dentry, inode);
1141
1142 return 0;
1143 }
1144
1145 /**
1146 * @brief Lookup a given dentry name under the directory.
1147 * @param dir The directory to lookup.
1148 * @param name The name of the dentry to find.
1149 * @param len The length of the dentry name.
1150 * @return dentry NULL if not found, a pointer to the dentry if found.
1151 */
tmpfs_dirlookup(struct inode * dir,const char * name,size_t len)1152 static struct dentry *tmpfs_dirlookup(struct inode *dir, const char *name,
1153 size_t len)
1154 {
1155 #if DEBUG
1156 BUG_ON(dir->type != FS_DIR);
1157 #endif
1158 u64 hash;
1159 struct hlist_head *head;
1160 struct dentry *iter;
1161
1162 hash = hash_chars(name, len);
1163
1164 head = htable_get_bucket(&dir->dentries, (u32)hash);
1165 for_each_in_hlist (iter, node, head) {
1166 if (iter->name.len == len && strncmp(iter->name.str, name, len) == 0) {
1167 return iter;
1168 }
1169 }
1170
1171 return NULL;
1172 }
1173
1174 #define DIRENT_NAME_MAX 256
1175
1176 /**
1177 * @brief Fill a dirent with some metadata.
1178 * @param dirpp The pointer of pointer of the dirent struct.
1179 * @param end The end of the array of dirent.
1180 * @param name The name of the dirent.
1181 * @param off The offset of the dirent in the directory.
1182 * @param type The type of the dirent.
1183 * @param ino The size of the inode.
1184 * @return int The length of the written data.
1185 */
tmpfs_dir_fill_dirent(void ** dirpp,void * end,char * name,off_t off,unsigned char type,ino_t ino)1186 static int tmpfs_dir_fill_dirent(void **dirpp, void *end, char *name, off_t off,
1187 unsigned char type, ino_t ino)
1188 {
1189 struct dirent *dirp = *(struct dirent **)dirpp;
1190 void *p = dirp;
1191 unsigned short len = sizeof(struct dirent);
1192 p += len;
1193 if (p > end)
1194 return -EAGAIN;
1195 dirp->d_ino = ino;
1196 dirp->d_off = off;
1197 dirp->d_reclen = len;
1198 dirp->d_type = type;
1199 strlcpy(dirp->d_name, name, DIRENT_NAME_MAX);
1200 *dirpp = p;
1201 return len;
1202 }
1203
1204 /**
1205 * @brief Scan a directory's dentries and write them into a buffer.
1206 * @param dir The directory to scan.
1207 * @param start The index in directory's dentry table to scan with.
1208 * @param buf The caller provided buffer of the dirent array.
1209 * @param end The end of the dirent array.
1210 * @return int The number of dentries scanned.
1211 * @return read_bytes The number of bytes written to the buffer.
1212 */
tmpfs_dir_scan(struct inode * dir,unsigned int start,void * buf,void * end,int * read_bytes)1213 static int tmpfs_dir_scan(struct inode *dir, unsigned int start, void *buf,
1214 void *end, int *read_bytes)
1215 {
1216 #if DEBUG
1217 BUG_ON(dir->type != FS_DIR);
1218 #endif
1219
1220 int cnt = 0, b, ret;
1221 ino_t ino;
1222 void *p = buf;
1223 unsigned char type;
1224 struct dentry *iter;
1225
1226 for_each_in_htable (iter, b, node, &dir->dentries) {
1227 if (cnt >= start) {
1228 type = iter->inode->type;
1229 ino = iter->inode->size;
1230
1231 ret =
1232 tmpfs_dir_fill_dirent(&p, end, iter->name.str, cnt, type, ino);
1233
1234 if (ret <= 0) {
1235 if (read_bytes) {
1236 *read_bytes = (int)(p - buf);
1237 }
1238 return (int)(cnt - start);
1239 }
1240 }
1241 cnt++;
1242 }
1243
1244 if (read_bytes) {
1245 *read_bytes = (int)(p - buf);
1246 }
1247 return (int)(cnt - start);
1248 }
1249
1250 /* Symlink operations */
1251
1252 /**
1253 * @brief Read the content of a symlink
1254 * @param symlink The symlink.
1255 * @param buff The buffer to read the symlink into.
1256 * @param len The length of the buffer
1257 * @return ssize_t The actual bytes read into the buffer.
1258 */
tmpfs_symlink_read(struct inode * symlink,char * buff,size_t len)1259 static ssize_t tmpfs_symlink_read(struct inode *symlink, char *buff, size_t len)
1260 {
1261 #if DEBUG
1262 BUG_ON(symlink->type != FS_SYM);
1263 #endif
1264
1265 len = len < symlink->size ? len : symlink->size;
1266 memcpy(buff, symlink->symlink, len);
1267 return (ssize_t)len;
1268 }
1269
1270 /**
1271 * @brief Write a symlink.
1272 * @param symlink The symlink.
1273 * @param buff The buffer of the content to be written.
1274 * @param len The length of the symlink to be written.
1275 * @return ssize_t The length of the link successfully written, or -ENAMETOOLONG
1276 * if the link is too long, or -ENOMEM if there is no enough memory for the link
1277 */
tmpfs_symlink_write(struct inode * symlink,const char * buff,size_t len)1278 static ssize_t tmpfs_symlink_write(struct inode *symlink, const char *buff,
1279 size_t len)
1280 {
1281 #if DEBUG
1282 BUG_ON(symlink->type != FS_SYM);
1283 #endif
1284
1285 if (len > MAX_SYM_LEN) {
1286 return -ENAMETOOLONG;
1287 }
1288
1289 if (symlink->symlink) {
1290 #if DEBUG_MEM_USAGE
1291 tmpfs_revoke_mem_usage(symlink->symlink, SYMLINK);
1292 #endif
1293 free(symlink->symlink);
1294 }
1295
1296 symlink->symlink = malloc(len + 1);
1297 if (!symlink->symlink) {
1298 return -ENOMEM;
1299 }
1300 #if DEBUG_MEM_USAGE
1301 tmpfs_record_mem_usage(symlink->symlink, len + 1, SYMLINK);
1302 #endif
1303
1304 memcpy(symlink->symlink, buff, len);
1305 symlink->symlink[len] = '\0';
1306
1307 symlink->size = (off_t)len;
1308
1309 return (ssize_t)len;
1310 }
1311
1312 struct base_inode_ops base_inode_ops = {
1313 .open = tmpfs_inode_open,
1314 .close = tmpfs_inode_close,
1315 .inc_nlinks = tmpfs_inode_inc_nlinks,
1316 .dec_nlinks = tmpfs_inode_dec_nlinks,
1317 .free = tmpfs_inode_free,
1318 .stat = tmpfs_inode_stat,
1319 };
1320
1321 struct regfile_ops regfile_ops = {
1322 .read = tmpfs_file_read,
1323 .write = tmpfs_file_write,
1324 .truncate = tmpfs_file_truncate,
1325 .punch_hole = tmpfs_file_punch_hole,
1326 .collapse_range = tmpfs_file_collapse_range,
1327 .zero_range = tmpfs_file_zero_range,
1328 .insert_range = tmpfs_file_insert_range,
1329 .allocate = tmpfs_file_allocate,
1330 };
1331
1332 struct dir_ops dir_ops = {
1333 .alloc_dentry = tmpfs_alloc_dent,
1334 .free_dentry = tmpfs_free_dent,
1335 .add_dentry = tmpfs_dir_add_dent,
1336 .remove_dentry = tmpfs_dir_remove_dent,
1337 .is_empty = tmpfs_dir_empty,
1338 .dirlookup = tmpfs_dirlookup,
1339 .mknod = tmpfs_dir_mknod,
1340 .link = tmpfs_dir_link,
1341 .unlink = tmpfs_dir_unlink,
1342 .mkdir = tmpfs_dir_mkdir,
1343 .rmdir = tmpfs_dir_rmdir,
1344 .rename = tmpfs_dir_rename,
1345 .scan = tmpfs_dir_scan,
1346 };
1347
1348 struct symlink_ops symlink_ops = {
1349 .read_link = tmpfs_symlink_read,
1350 .write_link = tmpfs_symlink_write,
1351 };
1352