• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Institute of Parallel And Distributed Systems (IPADS), Shanghai Jiao Tong University (SJTU)
3  * Licensed under the Mulan PSL v2.
4  * You can use this software according to the terms and conditions of the Mulan PSL v2.
5  * You may obtain a copy of Mulan PSL v2 at:
6  *     http://license.coscl.org.cn/MulanPSL2
7  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
8  * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
9  * PURPOSE.
10  * See the Mulan PSL v2 for more details.
11  */
12 /*
13  * This file contains some internal operations
14  * that are closely related to the management of
15  * some core data structures.
16  *
17  * These functions are mostly called using function pointers(with rare
18  * exceptions), by the form of "methods" in OOP languages. Most of these
19  * functions *are not* meant to be called directly!
20  *
21  * We have grouped these operations into four catagories
22  * 1. base inode operations 2. regular file operations
23  * 3. directory operations  4. symlink operations
24  */
25 
26 #include "chcore/container/hashtable.h"
27 #include "chcore/container/list.h"
28 #include "dirent.h"
29 #include "sys/stat.h"
30 #include "sys/mman.h"
31 #include <chcore/error.h>
32 #include <chcore/falloc.h>
33 #include <chcore/container/radix.h>
34 #include "defs.h"
35 #include <stddef.h>
36 #include <stdlib.h>
37 #include <errno.h>
38 #include <limits.h>
39 #include <string.h>
40 
41 struct inode *tmpfs_root = NULL;
42 struct dentry *tmpfs_root_dent = NULL;
43 
44 struct base_inode_ops base_inode_ops;
45 struct regfile_ops regfile_ops;
46 struct dir_ops dir_ops;
47 struct symlink_ops symlink_ops;
48 
49 #if DEBUG_MEM_USAGE
debug_radix_free(void * page)50 void debug_radix_free(void *page)
51 {
52     tmpfs_revoke_mem_usage(page, DATA_PAGE);
53     munmap_page(page);
54 }
55 #endif
56 
munmap_page(void * page)57 void munmap_page(void *page) {
58         munmap(page, PAGE_SIZE);
59 }
60 
61 /* string utils */
hash_chars(const char * str,size_t len)62 u64 hash_chars(const char *str, size_t len)
63 {
64     u64 seed = 131; /* 31 131 1313 13131 131313 etc.. */
65     u64 hash = 0;
66     int i;
67 
68     if (len < 0) {
69         while (*str) {
70             hash = (hash * seed) + *str;
71             str++;
72         }
73     } else {
74         for (i = 0; i < len; ++i)
75             hash = (hash * seed) + str[i];
76     }
77 
78     return hash;
79 }
80 
hash_string(struct string * s)81 void hash_string(struct string *s)
82 {
83     s->hash = hash_chars(s->str, s->len);
84 }
85 
init_string(struct string * s,const char * name,size_t len)86 int init_string(struct string *s, const char *name, size_t len)
87 {
88     free_string(s);
89 
90     /*
91      * s->str is allocated and copied,
92      * remember to free it afterwards
93      */
94     s->str = malloc(len + 1);
95     if (!s->str) {
96         return -ENOMEM;
97     }
98 
99 #if DEBUG_MEM_USAGE
100     tmpfs_record_mem_usage(s->str, len + 1, STRING);
101 #endif
102 
103     memcpy(s->str, name, len);
104     s->str[len] = '\0';
105 
106     s->len = len;
107 
108     hash_string(s);
109     return 0;
110 }
111 
free_string(struct string * s)112 void free_string(struct string *s)
113 {
114     if (s->str) {
115 #if DEBUG_MEM_USAGE
116         tmpfs_revoke_mem_usage(s->str, STRING);
117 #endif
118         free(s->str);
119     }
120     s->str = NULL;
121 }
122 
123 /* FS operations */
124 
125 /**
126  * @brief Get the fs's metadata.
127  * @return statbuf A pointer to the caller provided buff, filled with
128  * preset metadata of tmpfs.
129  */
tmpfs_fs_stat(struct statfs * statbuf)130 void tmpfs_fs_stat(struct statfs *statbuf)
131 {
132     const int faked_large_value = 1024 * 1024 * 512;
133     /* Type of filesystem (see below): ChCorE7mpf5 */
134     statbuf->f_type = 0xCCE7A9F5;
135     /* Optimal transfer block size */
136     statbuf->f_bsize = PAGE_SIZE;
137     /* Total data blocks in filesystem */
138     statbuf->f_blocks = faked_large_value;
139     /* Free blocks in filesystem */
140     statbuf->f_bfree = faked_large_value;
141     /* Free blocks available to unprivileged user */
142     statbuf->f_bavail = faked_large_value;
143     /* Total file nodes in filesystem */
144     statbuf->f_files = faked_large_value;
145     /* Free file nodes in filesystem */
146     statbuf->f_ffree = faked_large_value;
147     /* Filesystem ID (See man page) */
148     memset(&statbuf->f_fsid, 0, sizeof(statbuf->f_fsid));
149     /* Maximum length of filenames */
150     statbuf->f_namelen = MAX_PATH;
151     /* Fragment size (since Linux 2.6) */
152     statbuf->f_frsize = 512;
153     /* Mount flags of filesystem (since Linux 2.6.36) */
154     statbuf->f_flags = 0;
155 }
156 
157 /* Base inode operations */
158 
159 /**
160  * @brief Create an empty inode of a particular type.
161  * @param type The type of the inode to be created,
162  * can be one of FS_REG, FS_DIR, FS_SYM.
163  * @param mode The access mode of the file, ignored for now.
164  * @return inode The newly created inode, NULL if type is illegal or out of
165  * memory.
166  * @note The created inode is not ready for use. For example the directory is
167  * empty and has no dot and dotdot dentries.
168  */
tmpfs_inode_init(int type,mode_t mode)169 struct inode *tmpfs_inode_init(int type, mode_t mode)
170 {
171     struct inode *inode = malloc(sizeof(struct inode));
172 
173     if (inode == NULL) {
174         return inode;
175     }
176 
177 #if DEBUG_MEM_USAGE
178     tmpfs_record_mem_usage(inode, sizeof(struct inode), INODE);
179 #endif
180 
181     /* Type-specific fields initialization */
182     switch (type) {
183     case FS_REG:
184         inode->f_ops = &regfile_ops;
185 
186 #if DEBUG_MEM_USAGE
187         init_radix_w_deleter(&inode->data, debug_radix_free);
188 #else
189         init_radix_w_deleter(&inode->data, munmap_page);
190 #endif
191         break;
192     case FS_DIR:
193         inode->d_ops = &dir_ops;
194         init_htable(&inode->dentries, MAX_DIR_HASH_BUCKETS);
195         break;
196     case FS_SYM:
197         inode->sym_ops = &symlink_ops;
198         inode->symlink = NULL;
199         break;
200     default:
201 #if DEBUG_MEM_USAGE
202         tmpfs_revoke_mem_usage(inode, INODE);
203 #endif
204 
205         free(inode);
206         return NULL;
207     }
208 
209     inode->type = type;
210     inode->nlinks = 0; /* ZERO links now */
211     inode->size = 0;
212     inode->mode = mode;
213     inode->opened = false;
214     inode->base_ops = &base_inode_ops;
215 
216     return inode;
217 }
218 
219 /**
220  * @brief Open a file.
221  * @param inode Inode to be opened.
222  * @note Tmpfs does not have anything like an openfile table, and does not to do
223  * reference counting on the opened files, they are all done in fs_base. The
224  * only thing we have to worry is that it is completely legal for a file to be
225  * unlinked when someone is still holding the open file handle and using it, and
226  * we can not free the inode at that time, so a flag indicates the file is
227  * opened is needed.
228  */
tmpfs_inode_open(struct inode * inode)229 static inline void tmpfs_inode_open(struct inode *inode)
230 {
231     inode->opened = true;
232 }
233 
234 /**
235  * @brief Close a file. If the file has no link, free it.
236  * @param inode Inode to be closed
237  * @note As said above, when closing, the file may be unlinked before, we should
238  * clean it up then.
239  */
tmpfs_inode_close(struct inode * inode)240 static void tmpfs_inode_close(struct inode *inode)
241 {
242 #if DEBUG
243     BUG_ON(!inode->opened);
244     BUG_ON(inode->nlinks < 0);
245 #endif
246 
247     inode->opened = false;
248 
249     if (inode->nlinks == 0) {
250         inode->base_ops->free(inode);
251     }
252 }
253 
254 /**
255  * @brief Increase the inode's nlinks by 1.
256  * @param inode
257  */
tmpfs_inode_inc_nlinks(struct inode * inode)258 static inline void tmpfs_inode_inc_nlinks(struct inode *inode)
259 {
260 #if DEBUG
261     BUG_ON(inode->nlinks < 0);
262 #endif
263     inode->nlinks++;
264 }
265 
266 /**
267  * @brief Decrease the inode's nlinks by 1. If nlinks reaches
268  * zero and the inode is not being used, free it.
269  * @param inode
270  */
tmpfs_inode_dec_nlinks(struct inode * inode)271 static void tmpfs_inode_dec_nlinks(struct inode *inode)
272 {
273 #if DEBUG
274     BUG_ON(inode->nlinks <= 0);
275 #endif
276 
277     inode->nlinks--;
278     if (inode->nlinks == 0 && !inode->opened) {
279         inode->base_ops->free(inode);
280     }
281 }
282 
283 /**
284  * @brief Free an inode.
285  * @param inode Inode to be freed.
286  */
tmpfs_inode_free(struct inode * inode)287 static void tmpfs_inode_free(struct inode *inode)
288 {
289     /* freeing type-specific fields */
290     switch (inode->type) {
291     case FS_REG:
292         radix_free(&inode->data);
293         break;
294     case FS_DIR:
295         htable_free(&inode->dentries);
296         break;
297     case FS_SYM:
298         if (inode->symlink) {
299 #if DEBUG_MEM_USAGE
300             tmpfs_revoke_mem_usage(inode->symlink, SYMLINK);
301 #endif
302             free(inode->symlink);
303         }
304         break;
305     default:
306         BUG_ON(1);
307     }
308 
309 #if DEBUG_MEM_USAGE
310     tmpfs_revoke_mem_usage(inode, INODE);
311 #endif
312     free(inode);
313 }
314 
315 /**
316  * @brief Get an inode's metadata.
317  * @param inode The inode to stat.
318  * @return stat The pointer of the buffer to fill the stats in.
319  */
tmpfs_inode_stat(struct inode * inode,struct stat * stat)320 static void tmpfs_inode_stat(struct inode *inode, struct stat *stat)
321 {
322     memset(stat, 0, sizeof(struct stat));
323 
324     /* We currently support only a small part of stat fields */
325     switch (inode->type) {
326     case FS_DIR:
327         stat->st_mode = S_IFDIR;
328         break;
329     case FS_REG:
330         stat->st_mode = S_IFREG;
331         break;
332     case FS_SYM:
333         stat->st_mode = S_IFLNK;
334         break;
335     default:
336         BUG_ON(1);
337     }
338 
339 #if DEBUG
340     BUG_ON(inode->size >= LONG_MAX);
341 #endif
342 
343     stat->st_size = (off_t)inode->size;
344     stat->st_nlink = inode->nlinks;
345     stat->st_ino = (ino_t)(uintptr_t)inode;
346 }
347 
348 /* Regular file operations */
349 
350 /**
351  * @brief read a file's content at offset, and read for size bytes.
352  * @param reg The file to be read.
353  * @param size Read at most size bytes.
354  * @param offset The starting offset of the read.
355  * @param buff The caller provided buffer to be filled with the file content.
356  * @return ssize_t The actual number of bytes that have been read into the
357  * buffer.
358  */
tmpfs_file_read(struct inode * reg,char * buff,size_t size,off_t offset)359 static ssize_t tmpfs_file_read(struct inode *reg, char *buff, size_t size,
360                                off_t offset)
361 {
362 #if DEBUG
363     BUG_ON(reg->type != FS_REG);
364 #endif
365 
366     u64 page_no, page_off;
367     u64 cur_off = offset;
368     size_t to_read;
369     void *page;
370 
371     /* Returns 0 according to man pages. */
372     if (offset >= reg->size)
373         return 0;
374 
375     size = MIN(reg->size - offset, size);
376 
377     while (size > 0 && cur_off <= reg->size) {
378         page_no = cur_off / PAGE_SIZE;
379         page_off = cur_off % PAGE_SIZE;
380 
381         page = radix_get(&reg->data, page_no);
382         to_read = MIN(size, PAGE_SIZE - page_off);
383         if (!page)
384             memset(buff, 0, to_read);
385         else
386             memcpy(buff, page + page_off, to_read);
387         cur_off += to_read;
388         buff += to_read;
389         size -= to_read;
390     }
391 
392     return (ssize_t)(cur_off - offset);
393 }
394 
395 /**
396  * @brief Write a file's content at offset, and write for size bytes.
397  * @param reg The file to be written.
398  * @param buff The caller provided buffer that is to be written into the file.
399  * @param size Write at most size bytes.
400  * @param offset The starting offset of the write.
401  * @return ssize_t The actual number of bytes that have been written.
402  */
tmpfs_file_write(struct inode * reg,const char * buff,size_t len,off_t offset)403 static ssize_t tmpfs_file_write(struct inode *reg, const char *buff, size_t len,
404                                 off_t offset)
405 {
406 #if DEBUG
407     BUG_ON(reg->type != FS_REG);
408 #endif
409 
410     u64 page_no, page_off;
411     off_t cur_off = offset;
412     int to_write;
413     void *page = NULL;
414 
415     if (len == 0)
416         return 0;
417 
418     while (len > 0) {
419         page_no = cur_off / PAGE_SIZE;
420         page_off = cur_off % PAGE_SIZE;
421 
422         page = radix_get(&reg->data, page_no);
423         if (!page) {
424             page = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
425             if (!page)
426                 return (ssize_t)(cur_off - offset);
427 
428 #if DEBUG_MEM_USAGE
429             tmpfs_record_mem_usage(page, PAGE_SIZE, DATA_PAGE);
430 #endif
431 
432             if (page_off)
433                 memset(page, 0, page_off);
434             radix_add(&reg->data, page_no, page);
435         }
436 
437 #if DEBUG
438         BUG_ON(page == NULL);
439 #endif
440 
441         to_write = MIN(len, PAGE_SIZE - page_off);
442         memcpy(page + page_off, buff, to_write);
443         cur_off += to_write;
444         buff += to_write;
445         len -= to_write;
446     }
447 
448     if (cur_off > reg->size) {
449         reg->size = cur_off;
450         if (cur_off % PAGE_SIZE && page) {
451             /* if the last write cannot fill the last page, set the
452              * remaining space to zero to ensure the correctness of
453              * the file_read */
454             page_off = cur_off % PAGE_SIZE;
455             memset(page + page_off, 0, PAGE_SIZE - page_off);
456         }
457     }
458     return (ssize_t)(cur_off - offset);
459 }
460 
461 /**
462  * @brief Change a file's size to a length.
463  * @param reg The file to truncate.
464  * @param len The length to change to.
465  * @return 0 on success.
466  * @note When increasing the file's size, we only allocate the memory space in a
467  * lazy fashion. If do need the space to be allocated, consider functions
468  * related to fallocate().
469  */
tmpfs_file_truncate(struct inode * reg,off_t len)470 static int tmpfs_file_truncate(struct inode *reg, off_t len)
471 {
472 #if DEBUG
473     BUG_ON(reg->type != FS_REG);
474 #endif
475 
476     u64 page_no, page_off;
477     void *page;
478     if (len == 0) {
479         /* free radix tree and init an empty one */
480         radix_free(&reg->data);
481 
482 #if DEBUG_MEM_USAGE
483         init_radix_w_deleter(&reg->data, debug_radix_free);
484 #else
485         init_radix_w_deleter(&reg->data, munmap_page);
486 #endif
487         reg->size = 0;
488     } else if (len > reg->size) {
489         /* truncate should not allocate the space for the file */
490         reg->size = len;
491     } else if (len < reg->size) {
492         size_t cur_off = len;
493         size_t to_write;
494         page_no = cur_off / PAGE_SIZE;
495         page_off = cur_off % PAGE_SIZE;
496         if (page_off) {
497             /*
498              * if the last write cannot fill the last page, set the
499              * remaining space to zero to ensure the correctness of
500              * the file_read
501              */
502             page = radix_get(&reg->data, page_no);
503             if (page) {
504                 to_write = MIN(reg->size - len, PAGE_SIZE - page_off);
505                 memset(page + page_off, 0, to_write);
506                 cur_off += to_write;
507             }
508         }
509         while (cur_off < reg->size) {
510             page_no = cur_off / PAGE_SIZE;
511             radix_del(&reg->data, page_no, 1);
512             cur_off += PAGE_SIZE;
513         }
514         reg->size = len;
515     }
516 
517     return 0;
518 }
519 
520 /**
521  * @brief Deallocate disk space(memory in tmpfs) in the range specified by
522  * params offset to offset + len for a file.
523  * @param reg The file to modify.
524  * @param offset The start of the deallocate range.
525  * @param len The length of the deallocate range.
526  * @return int 0 on success, -1 if radix_del() fails.
527  * @note For full pages in the range, they are deleted from the file. For
528  * partial pages in the range, they are set to zero.
529  */
tmpfs_file_punch_hole(struct inode * reg,off_t offset,off_t len)530 static int tmpfs_file_punch_hole(struct inode *reg, off_t offset, off_t len)
531 {
532 #if DEBUG
533     BUG_ON(reg->type != FS_REG);
534 #endif
535 
536     u64 page_no, page_off;
537     u64 cur_off = offset;
538     off_t to_remove;
539     void *page;
540     int err;
541 
542     while (len > 0) {
543         page_no = cur_off / PAGE_SIZE;
544         page_off = cur_off % PAGE_SIZE;
545 
546         to_remove = MIN(len, PAGE_SIZE - page_off);
547         cur_off += to_remove;
548         len -= to_remove;
549         page = radix_get(&reg->data, page_no);
550         if (page) {
551             /*
552              * Linux Manpage:
553              * Within the specified range, partial filesystem blocks
554              * are zeroed, and whole filesystem blocks are removed
555              * from the file.
556              */
557             if (to_remove == PAGE_SIZE || cur_off == reg->size) {
558                 err = radix_del(&reg->data, page_no, 1);
559                 /* if no err, just continue! */
560                 if (err) {
561                     return err;
562                 }
563             } else {
564                 memset(page + page_off, 0, to_remove);
565             }
566         }
567     }
568     return 0;
569 }
570 
571 /**
572  * @brief Collapse file space in the range from offset to offset + len.
573  * @param reg The file to be collapsed.
574  * @param offset The start of the to be collapsed range.
575  * @param len The length of the range.
576  * @return int 0 on success, -EINVAL if the granularity not match or the params
577  are illegal, or -1 if radix_del() fails, or -ENOMEM if radix_add() fails.
578  * @note The range will be *removed*, without leaving a hole in the file. That
579  is, the content start at offset + len will be at offset when this is done, and
580  the file size will be len bytes smaller. Tmpfs only allows this to be done at a
581  page granularity.
582  */
tmpfs_file_collapse_range(struct inode * reg,off_t offset,off_t len)583 static int tmpfs_file_collapse_range(struct inode *reg, off_t offset, off_t len)
584 {
585 #if DEBUG
586     BUG_ON(reg->type != FS_REG);
587 #endif
588 
589     u64 page_no1, page_no2;
590     u64 cur_off = offset;
591     void *page1;
592     void *page2;
593     u64 remain;
594     int err;
595     off_t dist;
596 
597     /* To ensure efficient implementation, offset and len must be a mutiple
598      * of the filesystem logical block size */
599     if (offset % PAGE_SIZE || len % PAGE_SIZE)
600         return -EINVAL;
601     if (offset + len >= reg->size)
602         return -EINVAL;
603 
604     remain = ((reg->size + PAGE_SIZE - 1) - (offset + len)) / PAGE_SIZE;
605     dist = len / PAGE_SIZE;
606     while (remain-- > 0) {
607         page_no1 = cur_off / PAGE_SIZE;
608         page_no2 = page_no1 + dist;
609 
610         cur_off += PAGE_SIZE;
611         page1 = radix_get(&reg->data, page_no1);
612         page2 = radix_get(&reg->data, page_no2);
613         if (page1) {
614             err = radix_del(&reg->data, page_no1, 1);
615             if (err)
616                 goto error;
617         }
618         if (page2) {
619             err = radix_add(&reg->data, page_no1, page2);
620             if (err)
621                 goto error;
622             err = radix_del(&reg->data, page_no2, 0);
623             if (err)
624                 goto error;
625         }
626     }
627 
628     reg->size -= len;
629     return 0;
630 
631 error:
632     error("Error in collapse range!\n");
633     return err;
634 }
635 
636 /**
637  * @brief Zero the given range of a file.
638  * @param reg The file to modify.
639  * @param offset The start of the range.
640  * @param len The length of the range.
641  * @param mode The allocate mode of this call.
642  * @return 0 on success, -ENOSPC when out of disk(memory) space.
643  * @note The range will be set to zero, missing pages will be allocated. If
644  * FALLOC_FL_KEEP_SIZE is set in mode, the file size will not be changed.
645  */
tmpfs_file_zero_range(struct inode * reg,off_t offset,off_t len,mode_t mode)646 static int tmpfs_file_zero_range(struct inode *reg, off_t offset, off_t len,
647                                  mode_t mode)
648 {
649 #if DEBUG
650     BUG_ON(reg->type != FS_REG);
651 #endif
652 
653     u64 page_no, page_off;
654     u64 cur_off = offset;
655     off_t length = len;
656     off_t to_zero;
657     void *page;
658 
659     while (len > 0) {
660         page_no = cur_off / PAGE_SIZE;
661         page_off = cur_off % PAGE_SIZE;
662 
663         to_zero = MIN(len, PAGE_SIZE - page_off);
664         cur_off += to_zero;
665         len -= to_zero;
666         if (!len)
667             to_zero = PAGE_SIZE;
668         page = radix_get(&reg->data, page_no);
669         if (!page) {
670             page = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
671             if (!page)
672                 return -ENOSPC;
673 #if DEBUG_MEM_USAGE
674             tmpfs_record_mem_usage(page, PAGE_SIZE, DATA_PAGE);
675 #endif
676             radix_add(&reg->data, page_no, page);
677         }
678 
679 #if DEBUG
680         BUG_ON(!page);
681 #endif
682 
683         memset(page + page_off, 0, to_zero);
684     }
685 
686     if ((!(mode & FALLOC_FL_KEEP_SIZE)) && (offset + length > reg->size))
687         reg->size = offset + length;
688 
689     return 0;
690 }
691 
692 /**
693  * @brief Increasing a file's space.
694  * @param reg The file to modify.
695  * @param offset The start of the range.
696  * @param len The length of the range.
697  * @return int 0 on success, -EINVAL if the granularity not match or the params
698  are illegal, or -1 if radix_del() fails, or -ENOMEM if radix_add() fails.
699  * @note The operation inserts spaces at offset with length len, that is: the
700  content starting at the offset will be moved to offset + len, and the size of
701  the file increased by len. Tmpfs only allows this to happen at page
702  granularity.
703  */
tmpfs_file_insert_range(struct inode * reg,off_t offset,off_t len)704 static int tmpfs_file_insert_range(struct inode *reg, off_t offset, off_t len)
705 {
706 #if DEBUG
707     BUG_ON(reg->type != FS_REG);
708 #endif
709 
710     u64 page_no1, page_no2;
711     void *page;
712     int err;
713     off_t dist;
714 
715     /* To ensure efficient implementation, this mode has the same
716      * limitations as FALLOC_FL_COLLAPSE_RANGE regarding the granularity of
717      * the operation. (offset and len must be a mutiple of the filesystem
718      * logical block size) */
719     if (offset % PAGE_SIZE || len % PAGE_SIZE)
720         return -EINVAL;
721     /* If the offset is equal to or greater than the EOF, an error is
722      * returned. For such operations, ftruncate should be used. */
723     if (offset >= reg->size)
724         return -EINVAL;
725 
726     page_no1 = (reg->size + PAGE_SIZE - 1) / PAGE_SIZE;
727     dist = len / PAGE_SIZE;
728     while (page_no1 >= offset / PAGE_SIZE) {
729         page_no2 = page_no1 + dist;
730 #if DEBUG
731         BUG_ON(radix_get(&reg->data, page_no2));
732 #endif
733         page = radix_get(&reg->data, page_no1);
734         if (page) {
735             err = radix_del(&reg->data, page_no1, 0);
736             if (err)
737                 goto error;
738             err = radix_add(&reg->data, page_no2, page);
739             if (err)
740                 goto error;
741         }
742         page_no1--;
743     }
744 
745     reg->size += len;
746     return 0;
747 
748 error:
749     error("Error in insert range!\n");
750     return err;
751 }
752 
753 /**
754  * @brief Allocate disk space(memory in tmpfs) for the file, from offset to
755  * offset + len, and zero any newly allocated memory space. This ensures later
756  * operations within the allocated range will not fail because of lack of memory
757  * space.
758  * @param reg The file to operate with.
759  * @param offset The operation starts at offset.
760  * @param len The length of the allocated range.
761  * @param keep_size If keep_size is set, the file size will not be changed.
762  * Otherwise, if offset + len > reg->size, reg->size will be changed to offset +
763  * len.
764  * @return int 0 on success, -ENOSPC if not enough disk(memory in tmpfs) space.
765  */
tmpfs_file_allocate(struct inode * reg,off_t offset,off_t len,int keep_size)766 static int tmpfs_file_allocate(struct inode *reg, off_t offset, off_t len,
767                                int keep_size)
768 {
769 #if DEBUG
770     BUG_ON(reg->type != FS_REG);
771 #endif
772 
773     u64 page_no;
774     u64 cur_off = offset;
775     void *page;
776 
777     while (cur_off < offset + len) {
778         page_no = cur_off / PAGE_SIZE;
779 
780         page = radix_get(&reg->data, page_no);
781         if (!page) {
782             page = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
783             if (!page)
784                 return -ENOSPC;
785 #if DEBUG_MEM_USAGE
786             tmpfs_record_mem_usage(page, PAGE_SIZE, DATA_PAGE);
787 #endif
788 
789             if (radix_add(&reg->data, page_no, page)) {
790 #if DEBUG_MEM_USAGE
791                 tmpfs_revoke_mem_usage(page, DATA_PAGE);
792 #endif
793                 munmap_page(page);
794                 return -ENOSPC;
795             }
796             memset(page, 0, PAGE_SIZE);
797         }
798         cur_off += PAGE_SIZE;
799     }
800 
801     if (offset + len > reg->size && !keep_size) {
802         reg->size = offset + len;
803     }
804     return 0;
805 }
806 
807 /* Directory operations */
808 
809 /**
810  * @brief Allocate a dentry, and initialize it with the parent dentry.
811  * @param d_parent The parent dentry of the to-be-allocated dentry.
812  * @return The newly created dentry
813  */
tmpfs_alloc_dent()814 static struct dentry *tmpfs_alloc_dent()
815 {
816     struct dentry *dentry = malloc(sizeof(struct dentry));
817     if (!dentry) {
818         return CHCORE_ERR_PTR(-ENOMEM);
819     }
820 
821 #if DEBUG_MEM_USAGE
822     tmpfs_record_mem_usage(dentry, sizeof(struct dentry), DENTRY);
823 #endif
824 
825     dentry->inode = NULL;
826     dentry->name.str = NULL;
827     return dentry;
828 }
829 
830 /**
831  * @brief Free a dentry and its name string.
832  * @param inode The inode which does this operation, and should be the
833  * to-be-freed dentry's parent directory.
834  * @param dentry The dentry that is to be freed.
835  */
tmpfs_free_dent(struct dentry * dentry)836 static void tmpfs_free_dent(struct dentry *dentry)
837 {
838     free_string(&dentry->name);
839 
840 #if DEBUG_MEM_USAGE
841     tmpfs_revoke_mem_usage(dentry, DENTRY);
842 #endif
843     free(dentry);
844 }
845 
846 /**
847  * @brief Add a just allocated dentry into a directory, and adjust the dir's
848  * size.
849  * @param dir The directory to add the new dentry.
850  * @param new_dent The newly created dentry that will be added to the dir.
851  * @param name The name of the dentry.
852  * @param len The length of the name.
853  * @return int 0 for success, -ENOMEM if init_string() fails.
854  * @note This function does not create the new dentry, it presuppose a created
855  * dentry.
856  */
tmpfs_dir_add_dent(struct inode * dir,struct dentry * new_dent,char * name,size_t len)857 static int tmpfs_dir_add_dent(struct inode *dir, struct dentry *new_dent,
858                               char *name, size_t len)
859 {
860 #if DEBUG
861     BUG_ON(dir->type != FS_DIR);
862 #endif
863 
864     int err;
865 
866     err = init_string(&new_dent->name, name, len); /* copy the str */
867     if (err) {
868         return err;
869     }
870 
871     htable_add(&dir->dentries, (u32)(new_dent->name.hash), &new_dent->node);
872     dir->size += DENT_SIZE;
873 
874     return 0;
875 }
876 
877 /**
878  * @brief Remove a dentry from the dir's htable, adjust the size.
879  * @param dir the directory to remove the dentry from.
880  * @param dentry the dentry to be removed.
881  * @note This operation only operate on the directory, *dentry is not freed
882  */
tmpfs_dir_remove_dent(struct inode * dir,struct dentry * dentry)883 static void tmpfs_dir_remove_dent(struct inode *dir, struct dentry *dentry)
884 {
885 #if DEBUG
886     BUG_ON(dir->type != FS_DIR);
887 #endif
888 
889     htable_del(&dentry->node);
890     dir->size -= DENT_SIZE;
891     /* not freeing the dentry now */
892 }
893 
894 /**
895  * @brief Check if the directory is an empty and legal one.
896  * @param dir The directory to be checked.
897  * @return bool true for empty and legal, false otherwise.
898  * @note By saying empty and legal, we mean the directory should *have and only
899  * have* the dot dentry and the dotdot dentry in its dentries table.
900  */
tmpfs_dir_empty(struct inode * dir)901 static bool tmpfs_dir_empty(struct inode *dir)
902 {
903 #if DEBUG
904     BUG_ON(dir->type != FS_DIR);
905 #endif
906 
907     struct dentry *iter;
908     int i = 0;
909     int ret;
910 
911     bool found_dot = false, found_dotdot = false;
912 
913     for_each_in_htable (iter, i, node, &dir->dentries) {
914         if ((ret = strcmp(iter->name.str, ".")) != 0
915             && strcmp(iter->name.str, "..") != 0) {
916             return false;
917         }
918 
919         if (ret == 0) {
920             found_dot = true;
921         } else {
922             found_dotdot = true;
923         }
924     }
925 
926     return found_dot && found_dotdot;
927 }
928 
929 /**
930  * @brief Link the dentry with the inode, and do nothing else.
931  * @param dir the parent directory which the dentry belongs to.
932  * @param dentry The dentry, which has already been added to the directory, to
933  * hold the inode.
934  * @param inode The inode to be linked with the dentry.
935  */
tmpfs_dir_link(struct inode * dir,struct dentry * dentry,struct inode * inode)936 static void tmpfs_dir_link(struct inode *dir, struct dentry *dentry,
937                            struct inode *inode)
938 {
939 #if DEBUG
940     BUG_ON(dir->type != FS_DIR);
941 #endif
942 
943     dentry->inode = inode;
944     inode->base_ops->inc_nlinks(inode);
945 }
946 
947 /**
948  * @brief Unlink the dentry with its inode, and also remove the dentry from the
949  * directory and free the dentry.
950  * @param dir The parent directory which has the dentry in it.
951  * @param dentry The dentry to be unlinked.
952  */
tmpfs_dir_unlink(struct inode * dir,struct dentry * dentry)953 static void tmpfs_dir_unlink(struct inode *dir, struct dentry *dentry)
954 {
955 #if DEBUG
956     BUG_ON(dir->type != FS_DIR);
957     BUG_ON(dentry->inode == NULL);
958 #endif
959 
960     struct inode *inode = dentry->inode;
961 
962     dir->d_ops->remove_dentry(dir, dentry);
963     dir->d_ops->free_dentry(dentry);
964     inode->base_ops->dec_nlinks(inode);
965 }
966 
967 /**
968  * @brief Make a new directory under a parent dir.
969  * @param dir The parent dir to hold the new directory.
970  * @param dentry The dentry of the new directory.
971  * @param mode The access mode of the dir, ignored.
972  * @return int 0 on success, -ENOMEM on failure.
973  * @return inode Upon success, a new directory inode is created and can be
974  * accessed by dentry->inode.
975  */
tmpfs_dir_mkdir(struct inode * dir,struct dentry * dentry,mode_t mode)976 static int tmpfs_dir_mkdir(struct inode *dir, struct dentry *dentry,
977                            mode_t mode)
978 {
979 #if DEBUG
980     BUG_ON(dir->type != FS_DIR);
981 #endif
982 
983     int err = dir->d_ops->mknod(dir, dentry, mode, FS_DIR);
984     if (err) {
985         return err;
986     }
987 
988     struct inode *new_dir = dentry->inode;
989 
990     struct dentry *dot = new_dir->d_ops->alloc_dentry();
991     if (CHCORE_IS_ERR(dot)) {
992         err = CHCORE_PTR_ERR(dot);
993         goto free_node;
994     }
995 
996     err = new_dir->d_ops->add_dentry(new_dir, dot, ".", 1);
997     if (err) {
998         goto free_dot;
999     }
1000 
1001     new_dir->d_ops->link(new_dir, dot, new_dir);
1002 
1003     struct dentry *dotdot = new_dir->d_ops->alloc_dentry();
1004     if (CHCORE_IS_ERR(dotdot)) {
1005         err = CHCORE_PTR_ERR(dotdot);
1006         goto remove_dot;
1007     }
1008 
1009     err = new_dir->d_ops->add_dentry(new_dir, dotdot, "..", 2);
1010     if (err) {
1011         goto free_dotdot;
1012     }
1013 
1014     new_dir->d_ops->link(new_dir, dotdot, dir);
1015 
1016     return 0;
1017 
1018 free_dotdot:
1019     new_dir->d_ops->free_dentry(dotdot);
1020 remove_dot:
1021     new_dir->d_ops->remove_dentry(new_dir, dot);
1022 free_dot:
1023     new_dir->d_ops->free_dentry(dot);
1024 free_node:
1025     dentry->inode->base_ops->free(dentry->inode);
1026     /* the caller-allocated dentry is not freed */
1027     dentry->inode = NULL;
1028 
1029     return err;
1030 }
1031 
1032 /**
1033  * @brief Remove an empty directory and its dentry under the parent directory.
1034  * @param dir The parent directory which holds the to-be-removed directory.
1035  * @param dentry The dentry of the directory to remove.
1036  * @return int 0 on success, -ENOTEMPTY if the to-be-removed directory is not
1037  * empty.
1038  */
tmpfs_dir_rmdir(struct inode * dir,struct dentry * dentry)1039 static int tmpfs_dir_rmdir(struct inode *dir, struct dentry *dentry)
1040 {
1041 #if DEBUG
1042     BUG_ON(dir->type != FS_DIR);
1043     BUG_ON(dentry->inode->type != FS_DIR);
1044 #endif
1045 
1046     struct inode *to_remove = dentry->inode;
1047 
1048     if (!to_remove->d_ops->is_empty(to_remove)) {
1049         return -ENOTEMPTY;
1050     }
1051 
1052     struct dentry *dot = to_remove->d_ops->dirlookup(to_remove, ".", 1);
1053     struct dentry *dotdot = to_remove->d_ops->dirlookup(to_remove, "..", 2);
1054 
1055 #if DEBUG
1056     BUG_ON(dot == NULL || dotdot == NULL);
1057 #endif
1058 
1059     to_remove->d_ops->unlink(to_remove, dot);
1060     to_remove->d_ops->unlink(to_remove, dotdot);
1061 
1062     dir->d_ops->unlink(dir, dentry);
1063 
1064     return 0;
1065 }
1066 
1067 /*
1068  * All illegal cases have been handled outside
1069  * Call to this function is guaranteed to be legal and will succeed
1070  */
1071 /**
1072  * @brief Rename an inode by reclaiming its dentry.
1073  * @param old_dir The parent directory where the inode was under.
1074  * @param old_dentry The dentry for the inode under old_dir.
1075  * @param new_dir The parent directory where the inode is moving to.
1076  * @param new_dentry The dentry that the inode is moving to. Should be added to
1077  * the new_dir already.
1078  * @note The rename() system call is very different with this simple function
1079  * because of all the corner cases and checks. This operation is simple because
1080  * it presupposes all the checks have been done outside before the call to this
1081  * function so it can only handle the legal case and is guaranteed to succeed.
1082  */
tmpfs_dir_rename(struct inode * old_dir,struct dentry * old_dentry,struct inode * new_dir,struct dentry * new_dentry)1083 static void tmpfs_dir_rename(struct inode *old_dir, struct dentry *old_dentry,
1084                              struct inode *new_dir, struct dentry *new_dentry)
1085 {
1086 #if DEBUG
1087     BUG_ON(new_dir->type != FS_DIR);
1088     BUG_ON(old_dir->type != FS_DIR);
1089 #endif
1090 
1091     /* we just link the new_dentry to the inode and unlink the old_dentry */
1092     struct inode *inode = old_dentry->inode;
1093 
1094     new_dir->d_ops->link(new_dir, new_dentry, inode); /* link first */
1095     old_dir->d_ops->unlink(old_dir, old_dentry);
1096 
1097     if (inode->type == FS_DIR) {
1098         struct dentry *dotdot = inode->d_ops->dirlookup(inode, "..", 2);
1099 
1100 #if DEBUG
1101         struct dentry *dot = inode->d_ops->dirlookup(inode, ".", 1);
1102         BUG_ON(!dot);
1103         BUG_ON(!dotdot);
1104         BUG_ON(dot->inode != inode);
1105         BUG_ON(dotdot->inode != old_dir);
1106 #endif
1107 
1108         dotdot->inode = new_dir;
1109 
1110         /* dotdot is changed */
1111         old_dir->base_ops->dec_nlinks(old_dir);
1112         new_dir->base_ops->inc_nlinks(new_dir);
1113     }
1114 }
1115 
1116 /**
1117  * @brief Make a new inode under the parent directory.
1118  * @param dir The parent directory to hold the newly created inode.
1119  * @param dentry The dentry for the new inode. It should be already allocated
1120  * and added into the dir.
1121  * @param mode The access mode of this new inode. Ignored.
1122  * @param type The type of the inode. FS_REG, FS_DIR, FS_SYM can be used.
1123  * @return int 0 on success, -ENOMEM if the type is wrong or no enough memory.
1124  * @return inode On success, the newly created inode is returned and can be
1125  * accessed in dentry->inode.
1126  */
tmpfs_dir_mknod(struct inode * dir,struct dentry * dentry,mode_t mode,int type)1127 static int tmpfs_dir_mknod(struct inode *dir, struct dentry *dentry,
1128                            mode_t mode, int type)
1129 {
1130 #if DEBUG
1131     BUG_ON(dir->type != FS_DIR);
1132 #endif
1133 
1134     struct inode *inode = tmpfs_inode_init(type, mode);
1135     if (inode == NULL) {
1136         return -ENOMEM;
1137     }
1138 
1139     /* linking the inode and the dentry */
1140     dir->d_ops->link(dir, dentry, inode);
1141 
1142     return 0;
1143 }
1144 
1145 /**
1146  * @brief Lookup a given dentry name under the directory.
1147  * @param dir The directory to lookup.
1148  * @param name The name of the dentry to find.
1149  * @param len The length of the dentry name.
1150  * @return dentry NULL if not found, a pointer to the dentry if found.
1151  */
tmpfs_dirlookup(struct inode * dir,const char * name,size_t len)1152 static struct dentry *tmpfs_dirlookup(struct inode *dir, const char *name,
1153                                       size_t len)
1154 {
1155 #if DEBUG
1156     BUG_ON(dir->type != FS_DIR);
1157 #endif
1158     u64 hash;
1159     struct hlist_head *head;
1160     struct dentry *iter;
1161 
1162     hash = hash_chars(name, len);
1163 
1164     head = htable_get_bucket(&dir->dentries, (u32)hash);
1165     for_each_in_hlist (iter, node, head) {
1166         if (iter->name.len == len && strncmp(iter->name.str, name, len) == 0) {
1167             return iter;
1168         }
1169     }
1170 
1171     return NULL;
1172 }
1173 
1174 #define DIRENT_NAME_MAX 256
1175 
1176 /**
1177  * @brief Fill a dirent with some metadata.
1178  * @param dirpp The pointer of pointer of the dirent struct.
1179  * @param end The end of the array of dirent.
1180  * @param name The name of the dirent.
1181  * @param off The offset of the dirent in the directory.
1182  * @param type The type of the dirent.
1183  * @param ino The size of the inode.
1184  * @return int The length of the written data.
1185  */
tmpfs_dir_fill_dirent(void ** dirpp,void * end,char * name,off_t off,unsigned char type,ino_t ino)1186 static int tmpfs_dir_fill_dirent(void **dirpp, void *end, char *name, off_t off,
1187                                  unsigned char type, ino_t ino)
1188 {
1189     struct dirent *dirp = *(struct dirent **)dirpp;
1190     void *p = dirp;
1191     unsigned short len = sizeof(struct dirent);
1192     p += len;
1193     if (p > end)
1194         return -EAGAIN;
1195     dirp->d_ino = ino;
1196     dirp->d_off = off;
1197     dirp->d_reclen = len;
1198     dirp->d_type = type;
1199     strlcpy(dirp->d_name, name, DIRENT_NAME_MAX);
1200     *dirpp = p;
1201     return len;
1202 }
1203 
1204 /**
1205  * @brief Scan a directory's dentries and write them into a buffer.
1206  * @param dir The directory to scan.
1207  * @param start The index in directory's dentry table to scan with.
1208  * @param buf The caller provided buffer of the dirent array.
1209  * @param end The end of the dirent array.
1210  * @return int The number of dentries scanned.
1211  * @return read_bytes The number of bytes written to the buffer.
1212  */
tmpfs_dir_scan(struct inode * dir,unsigned int start,void * buf,void * end,int * read_bytes)1213 static int tmpfs_dir_scan(struct inode *dir, unsigned int start, void *buf,
1214                           void *end, int *read_bytes)
1215 {
1216 #if DEBUG
1217     BUG_ON(dir->type != FS_DIR);
1218 #endif
1219 
1220     int cnt = 0, b, ret;
1221     ino_t ino;
1222     void *p = buf;
1223     unsigned char type;
1224     struct dentry *iter;
1225 
1226     for_each_in_htable (iter, b, node, &dir->dentries) {
1227         if (cnt >= start) {
1228             type = iter->inode->type;
1229             ino = iter->inode->size;
1230 
1231             ret =
1232                 tmpfs_dir_fill_dirent(&p, end, iter->name.str, cnt, type, ino);
1233 
1234             if (ret <= 0) {
1235                 if (read_bytes) {
1236                     *read_bytes = (int)(p - buf);
1237                 }
1238                 return (int)(cnt - start);
1239             }
1240         }
1241         cnt++;
1242     }
1243 
1244     if (read_bytes) {
1245         *read_bytes = (int)(p - buf);
1246     }
1247     return (int)(cnt - start);
1248 }
1249 
1250 /* Symlink operations */
1251 
1252 /**
1253  * @brief Read the content of a symlink
1254  * @param symlink The symlink.
1255  * @param buff The buffer to read the symlink into.
1256  * @param len The length of the buffer
1257  * @return ssize_t The actual bytes read into the buffer.
1258  */
tmpfs_symlink_read(struct inode * symlink,char * buff,size_t len)1259 static ssize_t tmpfs_symlink_read(struct inode *symlink, char *buff, size_t len)
1260 {
1261 #if DEBUG
1262     BUG_ON(symlink->type != FS_SYM);
1263 #endif
1264 
1265     len = len < symlink->size ? len : symlink->size;
1266     memcpy(buff, symlink->symlink, len);
1267     return (ssize_t)len;
1268 }
1269 
1270 /**
1271  * @brief Write a symlink.
1272  * @param symlink The symlink.
1273  * @param buff The buffer of the content to be written.
1274  * @param len The length of the symlink to be written.
1275  * @return ssize_t The length of the link successfully written, or -ENAMETOOLONG
1276  * if the link is too long, or -ENOMEM if there is no enough memory for the link
1277  */
tmpfs_symlink_write(struct inode * symlink,const char * buff,size_t len)1278 static ssize_t tmpfs_symlink_write(struct inode *symlink, const char *buff,
1279                                    size_t len)
1280 {
1281 #if DEBUG
1282     BUG_ON(symlink->type != FS_SYM);
1283 #endif
1284 
1285     if (len > MAX_SYM_LEN) {
1286         return -ENAMETOOLONG;
1287     }
1288 
1289     if (symlink->symlink) {
1290 #if DEBUG_MEM_USAGE
1291         tmpfs_revoke_mem_usage(symlink->symlink, SYMLINK);
1292 #endif
1293         free(symlink->symlink);
1294     }
1295 
1296     symlink->symlink = malloc(len + 1);
1297     if (!symlink->symlink) {
1298         return -ENOMEM;
1299     }
1300 #if DEBUG_MEM_USAGE
1301     tmpfs_record_mem_usage(symlink->symlink, len + 1, SYMLINK);
1302 #endif
1303 
1304     memcpy(symlink->symlink, buff, len);
1305     symlink->symlink[len] = '\0';
1306 
1307     symlink->size = (off_t)len;
1308 
1309     return (ssize_t)len;
1310 }
1311 
1312 struct base_inode_ops base_inode_ops = {
1313     .open = tmpfs_inode_open,
1314     .close = tmpfs_inode_close,
1315     .inc_nlinks = tmpfs_inode_inc_nlinks,
1316     .dec_nlinks = tmpfs_inode_dec_nlinks,
1317     .free = tmpfs_inode_free,
1318     .stat = tmpfs_inode_stat,
1319 };
1320 
1321 struct regfile_ops regfile_ops = {
1322     .read = tmpfs_file_read,
1323     .write = tmpfs_file_write,
1324     .truncate = tmpfs_file_truncate,
1325     .punch_hole = tmpfs_file_punch_hole,
1326     .collapse_range = tmpfs_file_collapse_range,
1327     .zero_range = tmpfs_file_zero_range,
1328     .insert_range = tmpfs_file_insert_range,
1329     .allocate = tmpfs_file_allocate,
1330 };
1331 
1332 struct dir_ops dir_ops = {
1333     .alloc_dentry = tmpfs_alloc_dent,
1334     .free_dentry = tmpfs_free_dent,
1335     .add_dentry = tmpfs_dir_add_dent,
1336     .remove_dentry = tmpfs_dir_remove_dent,
1337     .is_empty = tmpfs_dir_empty,
1338     .dirlookup = tmpfs_dirlookup,
1339     .mknod = tmpfs_dir_mknod,
1340     .link = tmpfs_dir_link,
1341     .unlink = tmpfs_dir_unlink,
1342     .mkdir = tmpfs_dir_mkdir,
1343     .rmdir = tmpfs_dir_rmdir,
1344     .rename = tmpfs_dir_rename,
1345     .scan = tmpfs_dir_scan,
1346 };
1347 
1348 struct symlink_ops symlink_ops = {
1349     .read_link = tmpfs_symlink_read,
1350     .write_link = tmpfs_symlink_write,
1351 };
1352