1 // SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
2 /*
3 * Copyright (C) 2018 HUAWEI, Inc.
4 * http://www.huawei.com/
5 * Created by Li Guifu <bluce.liguifu@huawei.com>
6 */
7 #ifndef _LARGEFILE64_SOURCE
8 #define _LARGEFILE64_SOURCE
9 #endif
10 #ifndef _GNU_SOURCE
11 #define _GNU_SOURCE
12 #endif
13 #include <stdlib.h>
14 #include <sys/stat.h>
15 #include <sys/ioctl.h>
16 #include "erofs/io.h"
17 #ifdef HAVE_LINUX_FS_H
18 #include <linux/fs.h>
19 #endif
20 #ifdef HAVE_LINUX_FALLOC_H
21 #include <linux/falloc.h>
22 #endif
23 #ifdef HAVE_SYS_STATFS_H
24 #include <sys/statfs.h>
25 #endif
26 #define EROFS_MODNAME "erofs_io"
27 #include "erofs/print.h"
28
dev_get_blkdev_size(int fd,u64 * bytes)29 static int dev_get_blkdev_size(int fd, u64 *bytes)
30 {
31 errno = ENOTSUP;
32 #ifdef BLKGETSIZE64
33 if (ioctl(fd, BLKGETSIZE64, bytes) >= 0)
34 return 0;
35 #endif
36
37 #ifdef BLKGETSIZE
38 {
39 unsigned long size;
40 if (ioctl(fd, BLKGETSIZE, &size) >= 0) {
41 *bytes = ((u64)size << 9);
42 return 0;
43 }
44 }
45 #endif
46 return -errno;
47 }
48
dev_close(struct erofs_sb_info * sbi)49 void dev_close(struct erofs_sb_info *sbi)
50 {
51 close(sbi->devfd);
52 free(sbi->devname);
53 sbi->devname = NULL;
54 sbi->devfd = -1;
55 sbi->devsz = 0;
56 }
57
dev_open(struct erofs_sb_info * sbi,const char * dev)58 int dev_open(struct erofs_sb_info *sbi, const char *dev)
59 {
60 struct stat st;
61 int fd, ret;
62
63 #if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
64 bool again = false;
65
66 repeat:
67 #endif
68 fd = open(dev, O_RDWR | O_CREAT | O_BINARY, 0644);
69 if (fd < 0) {
70 erofs_err("failed to open(%s).", dev);
71 return -errno;
72 }
73
74 ret = fstat(fd, &st);
75 if (ret) {
76 erofs_err("failed to fstat(%s).", dev);
77 close(fd);
78 return -errno;
79 }
80
81 switch (st.st_mode & S_IFMT) {
82 case S_IFBLK:
83 ret = dev_get_blkdev_size(fd, &sbi->devsz);
84 if (ret) {
85 erofs_err("failed to get block device size(%s).", dev);
86 close(fd);
87 return ret;
88 }
89 sbi->devsz = round_down(sbi->devsz, erofs_blksiz(sbi));
90 break;
91 case S_IFREG:
92 if (st.st_size) {
93 #if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
94 struct statfs stfs;
95
96 if (again)
97 return -ENOTEMPTY;
98
99 /*
100 * fses like EXT4 and BTRFS will flush dirty blocks
101 * after truncate(0) even after the writeback happens
102 * (see kernel commit 7d8f9f7d150d and ccd2506bd431),
103 * which is NOT our intention. Let's work around this.
104 */
105 if (!fstatfs(fd, &stfs) && (stfs.f_type == 0xEF53 ||
106 stfs.f_type == 0x9123683E)) {
107 close(fd);
108 unlink(dev);
109 again = true;
110 goto repeat;
111 }
112 #endif
113 ret = ftruncate(fd, 0);
114 if (ret) {
115 erofs_err("failed to ftruncate(%s).", dev);
116 close(fd);
117 return -errno;
118 }
119 }
120 /* INT64_MAX is the limit of kernel vfs */
121 sbi->devsz = INT64_MAX;
122 sbi->devblksz = st.st_blksize;
123 break;
124 default:
125 erofs_err("bad file type (%s, %o).", dev, st.st_mode);
126 close(fd);
127 return -EINVAL;
128 }
129
130 sbi->devname = strdup(dev);
131 if (!sbi->devname) {
132 close(fd);
133 return -ENOMEM;
134 }
135 sbi->devfd = fd;
136
137 erofs_info("successfully to open %s", dev);
138 return 0;
139 }
140
blob_closeall(struct erofs_sb_info * sbi)141 void blob_closeall(struct erofs_sb_info *sbi)
142 {
143 unsigned int i;
144
145 for (i = 0; i < sbi->nblobs; ++i)
146 close(sbi->blobfd[i]);
147 sbi->nblobs = 0;
148 }
149
blob_open_ro(struct erofs_sb_info * sbi,const char * dev)150 int blob_open_ro(struct erofs_sb_info *sbi, const char *dev)
151 {
152 int fd = open(dev, O_RDONLY | O_BINARY);
153
154 if (fd < 0) {
155 erofs_err("failed to open(%s).", dev);
156 return -errno;
157 }
158
159 sbi->blobfd[sbi->nblobs] = fd;
160 erofs_info("successfully to open blob%u %s", sbi->nblobs, dev);
161 ++sbi->nblobs;
162 return 0;
163 }
164
165 /* XXX: temporary soluation. Disk I/O implementation needs to be refactored. */
dev_open_ro(struct erofs_sb_info * sbi,const char * dev)166 int dev_open_ro(struct erofs_sb_info *sbi, const char *dev)
167 {
168 int fd = open(dev, O_RDONLY | O_BINARY);
169
170 if (fd < 0) {
171 erofs_err("failed to open(%s).", dev);
172 return -errno;
173 }
174
175 sbi->devname = strdup(dev);
176 if (!sbi->devname) {
177 close(fd);
178 return -ENOMEM;
179 }
180 sbi->devfd = fd;
181 sbi->devsz = INT64_MAX;
182 return 0;
183 }
184
dev_write(struct erofs_sb_info * sbi,const void * buf,u64 offset,size_t len)185 int dev_write(struct erofs_sb_info *sbi, const void *buf, u64 offset, size_t len)
186 {
187 int ret;
188
189 if (cfg.c_dry_run)
190 return 0;
191
192 if (!buf) {
193 erofs_err("buf is NULL");
194 return -EINVAL;
195 }
196
197 if (offset >= sbi->devsz || len > sbi->devsz ||
198 offset > sbi->devsz - len) {
199 erofs_err("Write posion[%" PRIu64 ", %zd] is too large beyond the end of device(%" PRIu64 ").",
200 offset, len, sbi->devsz);
201 return -EINVAL;
202 }
203
204 #ifdef HAVE_PWRITE64
205 ret = pwrite64(sbi->devfd, buf, len, (off64_t)offset);
206 #else
207 ret = pwrite(sbi->devfd, buf, len, (off_t)offset);
208 #endif
209 if (ret != (int)len) {
210 if (ret < 0) {
211 erofs_err("Failed to write data into device - %s:[%" PRIu64 ", %zd].",
212 sbi->devname, offset, len);
213 return -errno;
214 }
215
216 erofs_err("Writing data into device - %s:[%" PRIu64 ", %zd] - was truncated.",
217 sbi->devname, offset, len);
218 return -ERANGE;
219 }
220 return 0;
221 }
222
dev_fillzero(struct erofs_sb_info * sbi,u64 offset,size_t len,bool padding)223 int dev_fillzero(struct erofs_sb_info *sbi, u64 offset, size_t len, bool padding)
224 {
225 static const char zero[EROFS_MAX_BLOCK_SIZE] = {0};
226 int ret;
227
228 if (cfg.c_dry_run)
229 return 0;
230
231 #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE)
232 if (!padding && fallocate(sbi->devfd, FALLOC_FL_PUNCH_HOLE |
233 FALLOC_FL_KEEP_SIZE, offset, len) >= 0)
234 return 0;
235 #endif
236 while (len > erofs_blksiz(sbi)) {
237 ret = dev_write(sbi, zero, offset, erofs_blksiz(sbi));
238 if (ret)
239 return ret;
240 len -= erofs_blksiz(sbi);
241 offset += erofs_blksiz(sbi);
242 }
243 return dev_write(sbi, zero, offset, len);
244 }
245
dev_fsync(struct erofs_sb_info * sbi)246 int dev_fsync(struct erofs_sb_info *sbi)
247 {
248 int ret;
249
250 ret = fsync(sbi->devfd);
251 if (ret) {
252 erofs_err("Could not fsync device!!!");
253 return -EIO;
254 }
255 return 0;
256 }
257
dev_resize(struct erofs_sb_info * sbi,unsigned int blocks)258 int dev_resize(struct erofs_sb_info *sbi, unsigned int blocks)
259 {
260 int ret;
261 struct stat st;
262 u64 length;
263
264 if (cfg.c_dry_run || sbi->devsz != INT64_MAX)
265 return 0;
266
267 ret = fstat(sbi->devfd, &st);
268 if (ret) {
269 erofs_err("failed to fstat.");
270 return -errno;
271 }
272
273 length = (u64)blocks * erofs_blksiz(sbi);
274 if (st.st_size == length)
275 return 0;
276 if (st.st_size > length)
277 return ftruncate(sbi->devfd, length);
278
279 length = length - st.st_size;
280 #if defined(HAVE_FALLOCATE)
281 if (fallocate(sbi->devfd, 0, st.st_size, length) >= 0)
282 return 0;
283 #endif
284 return dev_fillzero(sbi, st.st_size, length, true);
285 }
286
dev_read(struct erofs_sb_info * sbi,int device_id,void * buf,u64 offset,size_t len)287 int dev_read(struct erofs_sb_info *sbi, int device_id,
288 void *buf, u64 offset, size_t len)
289 {
290 int read_count, fd;
291
292 if (cfg.c_dry_run)
293 return 0;
294
295 offset += cfg.c_offset;
296
297 if (!buf) {
298 erofs_err("buf is NULL");
299 return -EINVAL;
300 }
301
302 if (!device_id) {
303 fd = sbi->devfd;
304 } else {
305 if (device_id > sbi->nblobs) {
306 erofs_err("invalid device id %d", device_id);
307 return -ENODEV;
308 }
309 fd = sbi->blobfd[device_id - 1];
310 }
311
312 while (len > 0) {
313 #ifdef HAVE_PREAD64
314 read_count = pread64(fd, buf, len, (off64_t)offset);
315 #else
316 read_count = pread(fd, buf, len, (off_t)offset);
317 #endif
318 if (read_count < 1) {
319 if (!read_count) {
320 erofs_info("Reach EOF of device - %s:[%" PRIu64 ", %zd].",
321 sbi->devname, offset, len);
322 memset(buf, 0, len);
323 return 0;
324 } else if (errno != EINTR) {
325 erofs_err("Failed to read data from device - %s:[%" PRIu64 ", %zd].",
326 sbi->devname, offset, len);
327 return -errno;
328 }
329 }
330 offset += read_count;
331 len -= read_count;
332 buf += read_count;
333 }
334 return 0;
335 }
336
__erofs_copy_file_range(int fd_in,erofs_off_t * off_in,int fd_out,erofs_off_t * off_out,size_t length)337 static ssize_t __erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
338 int fd_out, erofs_off_t *off_out,
339 size_t length)
340 {
341 size_t copied = 0;
342 char buf[8192];
343
344 /*
345 * Main copying loop. The buffer size is arbitrary and is a
346 * trade-off between stack size consumption, cache usage, and
347 * amortization of system call overhead.
348 */
349 while (length > 0) {
350 size_t to_read;
351 ssize_t read_count;
352 char *end, *p;
353
354 to_read = min_t(size_t, length, sizeof(buf));
355 #ifdef HAVE_PREAD64
356 read_count = pread64(fd_in, buf, to_read, *off_in);
357 #else
358 read_count = pread(fd_in, buf, to_read, *off_in);
359 #endif
360 if (read_count == 0)
361 /* End of file reached prematurely. */
362 return copied;
363 if (read_count < 0) {
364 /* Report the number of bytes copied so far. */
365 if (copied > 0)
366 return copied;
367 return -1;
368 }
369 *off_in += read_count;
370
371 /* Write the buffer part which was read to the destination. */
372 end = buf + read_count;
373 for (p = buf; p < end; ) {
374 ssize_t write_count;
375
376 #ifdef HAVE_PWRITE64
377 write_count = pwrite64(fd_out, p, end - p, *off_out);
378 #else
379 write_count = pwrite(fd_out, p, end - p, *off_out);
380 #endif
381 if (write_count < 0) {
382 /*
383 * Adjust the input read position to match what
384 * we have written, so that the caller can pick
385 * up after the error.
386 */
387 size_t written = p - buf;
388 /*
389 * NB: This needs to be signed so that we can
390 * form the negative value below.
391 */
392 ssize_t overread = read_count - written;
393
394 *off_in -= overread;
395 /* Report the number of bytes copied so far. */
396 if (copied + written > 0)
397 return copied + written;
398 return -1;
399 }
400 p += write_count;
401 *off_out += write_count;
402 } /* Write loop. */
403 copied += read_count;
404 length -= read_count;
405 }
406 return copied;
407 }
408
erofs_copy_file_range(int fd_in,erofs_off_t * off_in,int fd_out,erofs_off_t * off_out,size_t length)409 ssize_t erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
410 int fd_out, erofs_off_t *off_out,
411 size_t length)
412 {
413 #ifdef HAVE_COPY_FILE_RANGE
414 off64_t off64_in = *off_in, off64_out = *off_out;
415 ssize_t ret;
416
417 ret = copy_file_range(fd_in, &off64_in, fd_out, &off64_out,
418 length, 0);
419 if (ret >= 0)
420 goto out;
421 if (errno != ENOSYS && errno != EXDEV) {
422 ret = -errno;
423 out:
424 *off_in = off64_in;
425 *off_out = off64_out;
426 return ret;
427 }
428 #endif
429 return __erofs_copy_file_range(fd_in, off_in, fd_out, off_out, length);
430 }
431