• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
2 /*
3  * Copyright (C) 2018 HUAWEI, Inc.
4  *             http://www.huawei.com/
5  * Created by Li Guifu <bluce.liguifu@huawei.com>
6  */
7 #ifndef _LARGEFILE64_SOURCE
8 #define _LARGEFILE64_SOURCE
9 #endif
10 #ifndef _GNU_SOURCE
11 #define _GNU_SOURCE
12 #endif
13 #include <stdlib.h>
14 #include <sys/stat.h>
15 #include <sys/ioctl.h>
16 #include "erofs/io.h"
17 #ifdef HAVE_LINUX_FS_H
18 #include <linux/fs.h>
19 #endif
20 #ifdef HAVE_LINUX_FALLOC_H
21 #include <linux/falloc.h>
22 #endif
23 #ifdef HAVE_SYS_STATFS_H
24 #include <sys/statfs.h>
25 #endif
26 #define EROFS_MODNAME	"erofs_io"
27 #include "erofs/print.h"
28 
dev_get_blkdev_size(int fd,u64 * bytes)29 static int dev_get_blkdev_size(int fd, u64 *bytes)
30 {
31 	errno = ENOTSUP;
32 #ifdef BLKGETSIZE64
33 	if (ioctl(fd, BLKGETSIZE64, bytes) >= 0)
34 		return 0;
35 #endif
36 
37 #ifdef BLKGETSIZE
38 	{
39 		unsigned long size;
40 		if (ioctl(fd, BLKGETSIZE, &size) >= 0) {
41 			*bytes = ((u64)size << 9);
42 			return 0;
43 		}
44 	}
45 #endif
46 	return -errno;
47 }
48 
dev_close(struct erofs_sb_info * sbi)49 void dev_close(struct erofs_sb_info *sbi)
50 {
51 	close(sbi->devfd);
52 	free(sbi->devname);
53 	sbi->devname = NULL;
54 	sbi->devfd   = -1;
55 	sbi->devsz   = 0;
56 }
57 
dev_open(struct erofs_sb_info * sbi,const char * dev)58 int dev_open(struct erofs_sb_info *sbi, const char *dev)
59 {
60 	struct stat st;
61 	int fd, ret;
62 
63 #if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
64 	bool again = false;
65 
66 repeat:
67 #endif
68 	fd = open(dev, O_RDWR | O_CREAT | O_BINARY, 0644);
69 	if (fd < 0) {
70 		erofs_err("failed to open(%s).", dev);
71 		return -errno;
72 	}
73 
74 	ret = fstat(fd, &st);
75 	if (ret) {
76 		erofs_err("failed to fstat(%s).", dev);
77 		close(fd);
78 		return -errno;
79 	}
80 
81 	switch (st.st_mode & S_IFMT) {
82 	case S_IFBLK:
83 		ret = dev_get_blkdev_size(fd, &sbi->devsz);
84 		if (ret) {
85 			erofs_err("failed to get block device size(%s).", dev);
86 			close(fd);
87 			return ret;
88 		}
89 		sbi->devsz = round_down(sbi->devsz, erofs_blksiz(sbi));
90 		break;
91 	case S_IFREG:
92 		if (st.st_size) {
93 #if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
94 			struct statfs stfs;
95 
96 			if (again)
97 				return -ENOTEMPTY;
98 
99 			/*
100 			 * fses like EXT4 and BTRFS will flush dirty blocks
101 			 * after truncate(0) even after the writeback happens
102 			 * (see kernel commit 7d8f9f7d150d and ccd2506bd431),
103 			 * which is NOT our intention.  Let's work around this.
104 			 */
105 			if (!fstatfs(fd, &stfs) && (stfs.f_type == 0xEF53 ||
106 					stfs.f_type == 0x9123683E)) {
107 				close(fd);
108 				unlink(dev);
109 				again = true;
110 				goto repeat;
111 			}
112 #endif
113 			ret = ftruncate(fd, 0);
114 			if (ret) {
115 				erofs_err("failed to ftruncate(%s).", dev);
116 				close(fd);
117 				return -errno;
118 			}
119 		}
120 		/* INT64_MAX is the limit of kernel vfs */
121 		sbi->devsz = INT64_MAX;
122 		sbi->devblksz = st.st_blksize;
123 		break;
124 	default:
125 		erofs_err("bad file type (%s, %o).", dev, st.st_mode);
126 		close(fd);
127 		return -EINVAL;
128 	}
129 
130 	sbi->devname = strdup(dev);
131 	if (!sbi->devname) {
132 		close(fd);
133 		return -ENOMEM;
134 	}
135 	sbi->devfd = fd;
136 
137 	erofs_info("successfully to open %s", dev);
138 	return 0;
139 }
140 
blob_closeall(struct erofs_sb_info * sbi)141 void blob_closeall(struct erofs_sb_info *sbi)
142 {
143 	unsigned int i;
144 
145 	for (i = 0; i < sbi->nblobs; ++i)
146 		close(sbi->blobfd[i]);
147 	sbi->nblobs = 0;
148 }
149 
blob_open_ro(struct erofs_sb_info * sbi,const char * dev)150 int blob_open_ro(struct erofs_sb_info *sbi, const char *dev)
151 {
152 	int fd = open(dev, O_RDONLY | O_BINARY);
153 
154 	if (fd < 0) {
155 		erofs_err("failed to open(%s).", dev);
156 		return -errno;
157 	}
158 
159 	sbi->blobfd[sbi->nblobs] = fd;
160 	erofs_info("successfully to open blob%u %s", sbi->nblobs, dev);
161 	++sbi->nblobs;
162 	return 0;
163 }
164 
165 /* XXX: temporary soluation. Disk I/O implementation needs to be refactored. */
dev_open_ro(struct erofs_sb_info * sbi,const char * dev)166 int dev_open_ro(struct erofs_sb_info *sbi, const char *dev)
167 {
168 	int fd = open(dev, O_RDONLY | O_BINARY);
169 
170 	if (fd < 0) {
171 		erofs_err("failed to open(%s).", dev);
172 		return -errno;
173 	}
174 
175 	sbi->devname = strdup(dev);
176 	if (!sbi->devname) {
177 		close(fd);
178 		return -ENOMEM;
179 	}
180 	sbi->devfd = fd;
181 	sbi->devsz = INT64_MAX;
182 	return 0;
183 }
184 
dev_write(struct erofs_sb_info * sbi,const void * buf,u64 offset,size_t len)185 int dev_write(struct erofs_sb_info *sbi, const void *buf, u64 offset, size_t len)
186 {
187 	int ret;
188 
189 	if (cfg.c_dry_run)
190 		return 0;
191 
192 	if (!buf) {
193 		erofs_err("buf is NULL");
194 		return -EINVAL;
195 	}
196 
197 	if (offset >= sbi->devsz || len > sbi->devsz ||
198 	    offset > sbi->devsz - len) {
199 		erofs_err("Write posion[%" PRIu64 ", %zd] is too large beyond the end of device(%" PRIu64 ").",
200 			  offset, len, sbi->devsz);
201 		return -EINVAL;
202 	}
203 
204 #ifdef HAVE_PWRITE64
205 	ret = pwrite64(sbi->devfd, buf, len, (off64_t)offset);
206 #else
207 	ret = pwrite(sbi->devfd, buf, len, (off_t)offset);
208 #endif
209 	if (ret != (int)len) {
210 		if (ret < 0) {
211 			erofs_err("Failed to write data into device - %s:[%" PRIu64 ", %zd].",
212 				  sbi->devname, offset, len);
213 			return -errno;
214 		}
215 
216 		erofs_err("Writing data into device - %s:[%" PRIu64 ", %zd] - was truncated.",
217 			  sbi->devname, offset, len);
218 		return -ERANGE;
219 	}
220 	return 0;
221 }
222 
dev_fillzero(struct erofs_sb_info * sbi,u64 offset,size_t len,bool padding)223 int dev_fillzero(struct erofs_sb_info *sbi, u64 offset, size_t len, bool padding)
224 {
225 	static const char zero[EROFS_MAX_BLOCK_SIZE] = {0};
226 	int ret;
227 
228 	if (cfg.c_dry_run)
229 		return 0;
230 
231 #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE)
232 	if (!padding && fallocate(sbi->devfd, FALLOC_FL_PUNCH_HOLE |
233 				  FALLOC_FL_KEEP_SIZE, offset, len) >= 0)
234 		return 0;
235 #endif
236 	while (len > erofs_blksiz(sbi)) {
237 		ret = dev_write(sbi, zero, offset, erofs_blksiz(sbi));
238 		if (ret)
239 			return ret;
240 		len -= erofs_blksiz(sbi);
241 		offset += erofs_blksiz(sbi);
242 	}
243 	return dev_write(sbi, zero, offset, len);
244 }
245 
dev_fsync(struct erofs_sb_info * sbi)246 int dev_fsync(struct erofs_sb_info *sbi)
247 {
248 	int ret;
249 
250 	ret = fsync(sbi->devfd);
251 	if (ret) {
252 		erofs_err("Could not fsync device!!!");
253 		return -EIO;
254 	}
255 	return 0;
256 }
257 
dev_resize(struct erofs_sb_info * sbi,unsigned int blocks)258 int dev_resize(struct erofs_sb_info *sbi, unsigned int blocks)
259 {
260 	int ret;
261 	struct stat st;
262 	u64 length;
263 
264 	if (cfg.c_dry_run || sbi->devsz != INT64_MAX)
265 		return 0;
266 
267 	ret = fstat(sbi->devfd, &st);
268 	if (ret) {
269 		erofs_err("failed to fstat.");
270 		return -errno;
271 	}
272 
273 	length = (u64)blocks * erofs_blksiz(sbi);
274 	if (st.st_size == length)
275 		return 0;
276 	if (st.st_size > length)
277 		return ftruncate(sbi->devfd, length);
278 
279 	length = length - st.st_size;
280 #if defined(HAVE_FALLOCATE)
281 	if (fallocate(sbi->devfd, 0, st.st_size, length) >= 0)
282 		return 0;
283 #endif
284 	return dev_fillzero(sbi, st.st_size, length, true);
285 }
286 
dev_read(struct erofs_sb_info * sbi,int device_id,void * buf,u64 offset,size_t len)287 int dev_read(struct erofs_sb_info *sbi, int device_id,
288 	     void *buf, u64 offset, size_t len)
289 {
290 	int read_count, fd;
291 
292 	if (cfg.c_dry_run)
293 		return 0;
294 
295 	offset += cfg.c_offset;
296 
297 	if (!buf) {
298 		erofs_err("buf is NULL");
299 		return -EINVAL;
300 	}
301 
302 	if (!device_id) {
303 		fd = sbi->devfd;
304 	} else {
305 		if (device_id > sbi->nblobs) {
306 			erofs_err("invalid device id %d", device_id);
307 			return -ENODEV;
308 		}
309 		fd = sbi->blobfd[device_id - 1];
310 	}
311 
312 	while (len > 0) {
313 #ifdef HAVE_PREAD64
314 		read_count = pread64(fd, buf, len, (off64_t)offset);
315 #else
316 		read_count = pread(fd, buf, len, (off_t)offset);
317 #endif
318 		if (read_count < 1) {
319 			if (!read_count) {
320 				erofs_info("Reach EOF of device - %s:[%" PRIu64 ", %zd].",
321 					   sbi->devname, offset, len);
322 				memset(buf, 0, len);
323 				return 0;
324 			} else if (errno != EINTR) {
325 				erofs_err("Failed to read data from device - %s:[%" PRIu64 ", %zd].",
326 					  sbi->devname, offset, len);
327 				return -errno;
328 			}
329 		}
330 		offset += read_count;
331 		len -= read_count;
332 		buf += read_count;
333 	}
334 	return 0;
335 }
336 
__erofs_copy_file_range(int fd_in,erofs_off_t * off_in,int fd_out,erofs_off_t * off_out,size_t length)337 static ssize_t __erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
338 				       int fd_out, erofs_off_t *off_out,
339 				       size_t length)
340 {
341 	size_t copied = 0;
342 	char buf[8192];
343 
344 	/*
345 	 * Main copying loop.  The buffer size is arbitrary and is a
346 	 * trade-off between stack size consumption, cache usage, and
347 	 * amortization of system call overhead.
348 	 */
349 	while (length > 0) {
350 		size_t to_read;
351 		ssize_t read_count;
352 		char *end, *p;
353 
354 		to_read = min_t(size_t, length, sizeof(buf));
355 #ifdef HAVE_PREAD64
356 		read_count = pread64(fd_in, buf, to_read, *off_in);
357 #else
358 		read_count = pread(fd_in, buf, to_read, *off_in);
359 #endif
360 		if (read_count == 0)
361 			/* End of file reached prematurely. */
362 			return copied;
363 		if (read_count < 0) {
364 			/* Report the number of bytes copied so far. */
365 			if (copied > 0)
366 				return copied;
367 			return -1;
368 		}
369 		*off_in += read_count;
370 
371 		/* Write the buffer part which was read to the destination. */
372 		end = buf + read_count;
373 		for (p = buf; p < end; ) {
374 			ssize_t write_count;
375 
376 #ifdef HAVE_PWRITE64
377 			write_count = pwrite64(fd_out, p, end - p, *off_out);
378 #else
379 			write_count = pwrite(fd_out, p, end - p, *off_out);
380 #endif
381 			if (write_count < 0) {
382 				/*
383 				 * Adjust the input read position to match what
384 				 * we have written, so that the caller can pick
385 				 * up after the error.
386 				 */
387 				size_t written = p - buf;
388 				/*
389 				 * NB: This needs to be signed so that we can
390 				 * form the negative value below.
391 				 */
392 				ssize_t overread = read_count - written;
393 
394 				*off_in -= overread;
395 				/* Report the number of bytes copied so far. */
396 				if (copied + written > 0)
397 					return copied + written;
398 				return -1;
399 			}
400 			p += write_count;
401 			*off_out += write_count;
402 		} /* Write loop.  */
403 		copied += read_count;
404 		length -= read_count;
405 	}
406 	return copied;
407 }
408 
erofs_copy_file_range(int fd_in,erofs_off_t * off_in,int fd_out,erofs_off_t * off_out,size_t length)409 ssize_t erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
410 			      int fd_out, erofs_off_t *off_out,
411 			      size_t length)
412 {
413 #ifdef HAVE_COPY_FILE_RANGE
414 	off64_t off64_in = *off_in, off64_out = *off_out;
415 	ssize_t ret;
416 
417 	ret = copy_file_range(fd_in, &off64_in, fd_out, &off64_out,
418 			      length, 0);
419 	if (ret >= 0)
420 		goto out;
421 	if (errno != ENOSYS && errno != EXDEV) {
422 		ret = -errno;
423 out:
424 		*off_in = off64_in;
425 		*off_out = off64_out;
426 		return ret;
427 	}
428 #endif
429 	return __erofs_copy_file_range(fd_in, off_in, fd_out, off_out, length);
430 }
431