• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017-2018 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  * Copyright (C) 2021, Alibaba Cloud
6  */
7 #include "internal.h"
8 #include <linux/sched/mm.h>
9 #include <trace/events/erofs.h>
10 
erofs_unmap_metabuf(struct erofs_buf * buf)11 void erofs_unmap_metabuf(struct erofs_buf *buf)
12 {
13 	if (buf->kmap_type == EROFS_KMAP)
14 		kunmap_local(buf->base);
15 	buf->base = NULL;
16 	buf->kmap_type = EROFS_NO_KMAP;
17 }
18 
erofs_put_metabuf(struct erofs_buf * buf)19 void erofs_put_metabuf(struct erofs_buf *buf)
20 {
21 	if (!buf->page)
22 		return;
23 	erofs_unmap_metabuf(buf);
24 	folio_put(page_folio(buf->page));
25 	buf->page = NULL;
26 }
27 
erofs_bread(struct erofs_buf * buf,erofs_off_t offset,enum erofs_kmap_type type)28 void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset,
29 		  enum erofs_kmap_type type)
30 {
31 	pgoff_t index = (buf->off + offset) >> PAGE_SHIFT;
32 	struct folio *folio = NULL;
33 
34 	if (buf->page) {
35 		folio = page_folio(buf->page);
36 		if (folio_file_page(folio, index) != buf->page)
37 			erofs_unmap_metabuf(buf);
38 	}
39 	if (!folio || !folio_contains(folio, index)) {
40 		erofs_put_metabuf(buf);
41 		folio = read_mapping_folio(buf->mapping, index, buf->file);
42 		if (IS_ERR(folio))
43 			return folio;
44 	}
45 	buf->page = folio_file_page(folio, index);
46 
47 	if (buf->kmap_type == EROFS_NO_KMAP) {
48 		if (type == EROFS_KMAP)
49 			buf->base = kmap_local_page(buf->page);
50 		buf->kmap_type = type;
51 	} else if (buf->kmap_type != type) {
52 		DBG_BUGON(1);
53 		return ERR_PTR(-EFAULT);
54 	}
55 	if (type == EROFS_NO_KMAP)
56 		return NULL;
57 	return buf->base + (offset & ~PAGE_MASK);
58 }
59 
erofs_init_metabuf(struct erofs_buf * buf,struct super_block * sb)60 void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
61 {
62 	struct erofs_sb_info *sbi = EROFS_SB(sb);
63 
64 	buf->file = NULL;
65 	buf->off = sbi->dif0.fsoff;
66 	if (erofs_is_fileio_mode(sbi)) {
67 		buf->file = sbi->dif0.file;	/* some fs like FUSE needs it */
68 		buf->mapping = buf->file->f_mapping;
69 	} else if (erofs_is_fscache_mode(sb))
70 		buf->mapping = sbi->dif0.fscache->inode->i_mapping;
71 	else
72 		buf->mapping = sb->s_bdev->bd_mapping;
73 }
74 
erofs_read_metabuf(struct erofs_buf * buf,struct super_block * sb,erofs_off_t offset,enum erofs_kmap_type type)75 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
76 			 erofs_off_t offset, enum erofs_kmap_type type)
77 {
78 	erofs_init_metabuf(buf, sb);
79 	return erofs_bread(buf, offset, type);
80 }
81 
erofs_map_blocks_flatmode(struct inode * inode,struct erofs_map_blocks * map)82 static int erofs_map_blocks_flatmode(struct inode *inode,
83 				     struct erofs_map_blocks *map)
84 {
85 	struct erofs_inode *vi = EROFS_I(inode);
86 	struct super_block *sb = inode->i_sb;
87 	bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
88 	erofs_blk_t lastblk = erofs_iblks(inode) - tailendpacking;
89 
90 	map->m_flags = EROFS_MAP_MAPPED;	/* no hole in flat inodes */
91 	if (map->m_la < erofs_pos(sb, lastblk)) {
92 		map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la;
93 		map->m_plen = erofs_pos(sb, lastblk) - map->m_la;
94 	} else {
95 		DBG_BUGON(!tailendpacking);
96 		map->m_pa = erofs_iloc(inode) + vi->inode_isize +
97 			vi->xattr_isize + erofs_blkoff(sb, map->m_la);
98 		map->m_plen = inode->i_size - map->m_la;
99 
100 		/* inline data should be located in the same meta block */
101 		if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) {
102 			erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid);
103 			DBG_BUGON(1);
104 			return -EFSCORRUPTED;
105 		}
106 		map->m_flags |= EROFS_MAP_META;
107 	}
108 	return 0;
109 }
110 
erofs_map_blocks(struct inode * inode,struct erofs_map_blocks * map)111 int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
112 {
113 	struct super_block *sb = inode->i_sb;
114 	struct erofs_inode *vi = EROFS_I(inode);
115 	struct erofs_inode_chunk_index *idx;
116 	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
117 	u64 chunknr;
118 	unsigned int unit;
119 	erofs_off_t pos;
120 	void *kaddr;
121 	int err = 0;
122 
123 	trace_erofs_map_blocks_enter(inode, map, 0);
124 	map->m_deviceid = 0;
125 	if (map->m_la >= inode->i_size) {
126 		/* leave out-of-bound access unmapped */
127 		map->m_flags = 0;
128 		map->m_plen = map->m_llen;
129 		goto out;
130 	}
131 
132 	if (vi->datalayout != EROFS_INODE_CHUNK_BASED) {
133 		err = erofs_map_blocks_flatmode(inode, map);
134 		goto out;
135 	}
136 
137 	if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
138 		unit = sizeof(*idx);			/* chunk index */
139 	else
140 		unit = EROFS_BLOCK_MAP_ENTRY_SIZE;	/* block map */
141 
142 	chunknr = map->m_la >> vi->chunkbits;
143 	pos = ALIGN(erofs_iloc(inode) + vi->inode_isize +
144 		    vi->xattr_isize, unit) + unit * chunknr;
145 
146 	kaddr = erofs_read_metabuf(&buf, sb, pos, EROFS_KMAP);
147 	if (IS_ERR(kaddr)) {
148 		err = PTR_ERR(kaddr);
149 		goto out;
150 	}
151 	map->m_la = chunknr << vi->chunkbits;
152 	map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits,
153 			round_up(inode->i_size - map->m_la, sb->s_blocksize));
154 
155 	/* handle block map */
156 	if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
157 		__le32 *blkaddr = kaddr;
158 
159 		if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
160 			map->m_flags = 0;
161 		} else {
162 			map->m_pa = erofs_pos(sb, le32_to_cpu(*blkaddr));
163 			map->m_flags = EROFS_MAP_MAPPED;
164 		}
165 		goto out_unlock;
166 	}
167 	/* parse chunk indexes */
168 	idx = kaddr;
169 	switch (le32_to_cpu(idx->blkaddr)) {
170 	case EROFS_NULL_ADDR:
171 		map->m_flags = 0;
172 		break;
173 	default:
174 		map->m_deviceid = le16_to_cpu(idx->device_id) &
175 			EROFS_SB(sb)->device_id_mask;
176 		map->m_pa = erofs_pos(sb, le32_to_cpu(idx->blkaddr));
177 		map->m_flags = EROFS_MAP_MAPPED;
178 		break;
179 	}
180 out_unlock:
181 	erofs_put_metabuf(&buf);
182 out:
183 	if (!err)
184 		map->m_llen = map->m_plen;
185 	trace_erofs_map_blocks_exit(inode, map, 0, err);
186 	return err;
187 }
188 
erofs_fill_from_devinfo(struct erofs_map_dev * map,struct super_block * sb,struct erofs_device_info * dif)189 static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
190 		struct super_block *sb, struct erofs_device_info *dif)
191 {
192 	map->m_sb = sb;
193 	map->m_dif = dif;
194 	map->m_bdev = NULL;
195 	if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode))
196 		map->m_bdev = file_bdev(dif->file);
197 }
198 
erofs_map_dev(struct super_block * sb,struct erofs_map_dev * map)199 int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
200 {
201 	struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
202 	struct erofs_device_info *dif;
203 	erofs_off_t startoff, length;
204 	int id;
205 
206 	erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0);
207 	map->m_bdev = sb->s_bdev;	/* use s_bdev for the primary device */
208 	if (map->m_deviceid) {
209 		down_read(&devs->rwsem);
210 		dif = idr_find(&devs->tree, map->m_deviceid - 1);
211 		if (!dif) {
212 			up_read(&devs->rwsem);
213 			return -ENODEV;
214 		}
215 		if (devs->flatdev) {
216 			map->m_pa += erofs_pos(sb, dif->mapped_blkaddr);
217 			up_read(&devs->rwsem);
218 			return 0;
219 		}
220 		erofs_fill_from_devinfo(map, sb, dif);
221 		up_read(&devs->rwsem);
222 	} else if (devs->extra_devices && !devs->flatdev) {
223 		down_read(&devs->rwsem);
224 		idr_for_each_entry(&devs->tree, dif, id) {
225 			if (!dif->mapped_blkaddr)
226 				continue;
227 
228 			startoff = erofs_pos(sb, dif->mapped_blkaddr);
229 			length = erofs_pos(sb, dif->blocks);
230 			if (map->m_pa >= startoff &&
231 			    map->m_pa < startoff + length) {
232 				map->m_pa -= startoff;
233 				erofs_fill_from_devinfo(map, sb, dif);
234 				break;
235 			}
236 		}
237 		up_read(&devs->rwsem);
238 	}
239 	return 0;
240 }
241 
242 /*
243  * bit 30: I/O error occurred on this folio
244  * bit 29: CPU has dirty data in D-cache (needs aliasing handling);
245  * bit 0 - 29: remaining parts to complete this folio
246  */
247 #define EROFS_ONLINEFOLIO_EIO		30
248 #define EROFS_ONLINEFOLIO_DIRTY		29
249 
erofs_onlinefolio_init(struct folio * folio)250 void erofs_onlinefolio_init(struct folio *folio)
251 {
252 	union {
253 		atomic_t o;
254 		void *v;
255 	} u = { .o = ATOMIC_INIT(1) };
256 
257 	folio->private = u.v;	/* valid only if file-backed folio is locked */
258 }
259 
erofs_onlinefolio_split(struct folio * folio)260 void erofs_onlinefolio_split(struct folio *folio)
261 {
262 	atomic_inc((atomic_t *)&folio->private);
263 }
264 
erofs_onlinefolio_end(struct folio * folio,int err,bool dirty)265 void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty)
266 {
267 	int orig, v;
268 
269 	do {
270 		orig = atomic_read((atomic_t *)&folio->private);
271 		DBG_BUGON(orig <= 0);
272 		v = dirty << EROFS_ONLINEFOLIO_DIRTY;
273 		v |= (orig - 1) | (!!err << EROFS_ONLINEFOLIO_EIO);
274 	} while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
275 
276 	if (v & (BIT(EROFS_ONLINEFOLIO_DIRTY) - 1))
277 		return;
278 	folio->private = 0;
279 	if (v & BIT(EROFS_ONLINEFOLIO_DIRTY))
280 		flush_dcache_folio(folio);
281 	folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO)));
282 }
283 
erofs_iomap_begin(struct inode * inode,loff_t offset,loff_t length,unsigned int flags,struct iomap * iomap,struct iomap * srcmap)284 static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
285 		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
286 {
287 	int ret;
288 	struct super_block *sb = inode->i_sb;
289 	struct erofs_map_blocks map;
290 	struct erofs_map_dev mdev;
291 
292 	map.m_la = offset;
293 	map.m_llen = length;
294 
295 	ret = erofs_map_blocks(inode, &map);
296 	if (ret < 0)
297 		return ret;
298 
299 	mdev = (struct erofs_map_dev) {
300 		.m_deviceid = map.m_deviceid,
301 		.m_pa = map.m_pa,
302 	};
303 	ret = erofs_map_dev(sb, &mdev);
304 	if (ret)
305 		return ret;
306 
307 	iomap->offset = map.m_la;
308 	if (flags & IOMAP_DAX)
309 		iomap->dax_dev = mdev.m_dif->dax_dev;
310 	else
311 		iomap->bdev = mdev.m_bdev;
312 	iomap->length = map.m_llen;
313 	iomap->flags = 0;
314 	iomap->private = NULL;
315 
316 	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
317 		iomap->type = IOMAP_HOLE;
318 		iomap->addr = IOMAP_NULL_ADDR;
319 		if (!iomap->length)
320 			iomap->length = length;
321 		return 0;
322 	}
323 
324 	if (map.m_flags & EROFS_MAP_META) {
325 		void *ptr;
326 		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
327 
328 		iomap->type = IOMAP_INLINE;
329 		ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, EROFS_KMAP);
330 		if (IS_ERR(ptr))
331 			return PTR_ERR(ptr);
332 		iomap->inline_data = ptr;
333 		iomap->private = buf.base;
334 	} else {
335 		iomap->type = IOMAP_MAPPED;
336 		iomap->addr = mdev.m_dif->fsoff + mdev.m_pa;
337 		if (flags & IOMAP_DAX)
338 			iomap->addr += mdev.m_dif->dax_part_off;
339 	}
340 	return 0;
341 }
342 
erofs_iomap_end(struct inode * inode,loff_t pos,loff_t length,ssize_t written,unsigned int flags,struct iomap * iomap)343 static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
344 		ssize_t written, unsigned int flags, struct iomap *iomap)
345 {
346 	void *ptr = iomap->private;
347 
348 	if (ptr) {
349 		struct erofs_buf buf = {
350 			.page = kmap_to_page(ptr),
351 			.base = ptr,
352 			.kmap_type = EROFS_KMAP,
353 		};
354 
355 		DBG_BUGON(iomap->type != IOMAP_INLINE);
356 		erofs_put_metabuf(&buf);
357 	} else {
358 		DBG_BUGON(iomap->type == IOMAP_INLINE);
359 	}
360 	return written;
361 }
362 
363 static const struct iomap_ops erofs_iomap_ops = {
364 	.iomap_begin = erofs_iomap_begin,
365 	.iomap_end = erofs_iomap_end,
366 };
367 
erofs_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo,u64 start,u64 len)368 int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
369 		 u64 start, u64 len)
370 {
371 	if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) {
372 #ifdef CONFIG_EROFS_FS_ZIP
373 		return iomap_fiemap(inode, fieinfo, start, len,
374 				    &z_erofs_iomap_report_ops);
375 #else
376 		return -EOPNOTSUPP;
377 #endif
378 	}
379 	return iomap_fiemap(inode, fieinfo, start, len, &erofs_iomap_ops);
380 }
381 
382 /*
383  * since we dont have write or truncate flows, so no inode
384  * locking needs to be held at the moment.
385  */
erofs_read_folio(struct file * file,struct folio * folio)386 static int erofs_read_folio(struct file *file, struct folio *folio)
387 {
388 	trace_erofs_read_folio(folio, true);
389 
390 	return iomap_read_folio(folio, &erofs_iomap_ops);
391 }
392 
erofs_readahead(struct readahead_control * rac)393 static void erofs_readahead(struct readahead_control *rac)
394 {
395 	trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
396 					readahead_count(rac), true);
397 
398 	return iomap_readahead(rac, &erofs_iomap_ops);
399 }
400 
erofs_bmap(struct address_space * mapping,sector_t block)401 static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
402 {
403 	return iomap_bmap(mapping, block, &erofs_iomap_ops);
404 }
405 
erofs_file_read_iter(struct kiocb * iocb,struct iov_iter * to)406 static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
407 {
408 	struct inode *inode = file_inode(iocb->ki_filp);
409 
410 	/* no need taking (shared) inode lock since it's a ro filesystem */
411 	if (!iov_iter_count(to))
412 		return 0;
413 
414 #ifdef CONFIG_FS_DAX
415 	if (IS_DAX(inode))
416 		return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
417 #endif
418 	if (iocb->ki_flags & IOCB_DIRECT) {
419 		struct block_device *bdev = inode->i_sb->s_bdev;
420 		unsigned int blksize_mask;
421 
422 		if (bdev)
423 			blksize_mask = bdev_logical_block_size(bdev) - 1;
424 		else
425 			blksize_mask = i_blocksize(inode) - 1;
426 
427 		if ((iocb->ki_pos | iov_iter_count(to) |
428 		     iov_iter_alignment(to)) & blksize_mask)
429 			return -EINVAL;
430 
431 		return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
432 				    NULL, 0, NULL, 0);
433 	}
434 	return filemap_read(iocb, to, 0);
435 }
436 
437 /* for uncompressed (aligned) files and raw access for other files */
438 const struct address_space_operations erofs_aops = {
439 	.read_folio = erofs_read_folio,
440 	.readahead = erofs_readahead,
441 	.bmap = erofs_bmap,
442 	.direct_IO = noop_direct_IO,
443 	.release_folio = iomap_release_folio,
444 	.invalidate_folio = iomap_invalidate_folio,
445 };
446 
447 #ifdef CONFIG_FS_DAX
erofs_dax_huge_fault(struct vm_fault * vmf,unsigned int order)448 static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf,
449 		unsigned int order)
450 {
451 	return dax_iomap_fault(vmf, order, NULL, NULL, &erofs_iomap_ops);
452 }
453 
erofs_dax_fault(struct vm_fault * vmf)454 static vm_fault_t erofs_dax_fault(struct vm_fault *vmf)
455 {
456 	return erofs_dax_huge_fault(vmf, 0);
457 }
458 
459 static const struct vm_operations_struct erofs_dax_vm_ops = {
460 	.fault		= erofs_dax_fault,
461 	.huge_fault	= erofs_dax_huge_fault,
462 };
463 
erofs_file_mmap(struct file * file,struct vm_area_struct * vma)464 static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma)
465 {
466 	if (!IS_DAX(file_inode(file)))
467 		return generic_file_readonly_mmap(file, vma);
468 
469 	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
470 		return -EINVAL;
471 
472 	vma->vm_ops = &erofs_dax_vm_ops;
473 	vm_flags_set(vma, VM_HUGEPAGE);
474 	return 0;
475 }
476 #else
477 #define erofs_file_mmap	generic_file_readonly_mmap
478 #endif
479 
480 const struct file_operations erofs_file_fops = {
481 	.llseek		= generic_file_llseek,
482 	.read_iter	= erofs_file_read_iter,
483 	.mmap		= erofs_file_mmap,
484 	.get_unmapped_area = thp_get_unmapped_area,
485 	.splice_read	= filemap_splice_read,
486 };
487