• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * device DAX engine
3  *
4  * IO engine that reads/writes from files by doing memcpy to/from
5  * a memory mapped region of DAX enabled device.
6  *
7  * Copyright (C) 2016 Intel Corp
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License,
11  * version 2 as published by the Free Software Foundation..
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  */
19 
20 /*
21  * device dax engine
22  * IO engine that access a DAX device directly for read and write data
23  *
24  * To use:
25  *   ioengine=dev-dax
26  *
27  *   Other relevant settings:
28  *     iodepth=1
29  *     direct=0	   REQUIRED
30  *     filename=/dev/daxN.N
31  *     bs=2m
32  *
33  *     direct should be left to 0. Using dev-dax implies that memory access
34  *     is direct. However, dev-dax does not support O_DIRECT flag by design
35  *     since it is not necessary.
36  *
37  *     bs should adhere to the device dax alignment at minimally.
38  *
39  * libpmem.so
40  *   By default, the dev-dax engine will let the system find the libpmem.so
41  *   that it uses. You can use an alternative libpmem by setting the
42  *   FIO_PMEM_LIB environment variable to the full path to the desired
43  *   libpmem.so.
44  */
45 
46 #include <stdio.h>
47 #include <limits.h>
48 #include <stdlib.h>
49 #include <unistd.h>
50 #include <errno.h>
51 #include <sys/mman.h>
52 #include <sys/stat.h>
53 #include <sys/sysmacros.h>
54 #include <libgen.h>
55 #include <libpmem.h>
56 
57 #include "../fio.h"
58 #include "../verify.h"
59 
60 /*
61  * Limits us to 1GiB of mapped files in total to model after
62  * mmap engine behavior
63  */
64 #define MMAP_TOTAL_SZ	(1 * 1024 * 1024 * 1024UL)
65 
66 struct fio_devdax_data {
67 	void *devdax_ptr;
68 	size_t devdax_sz;
69 	off_t devdax_off;
70 };
71 
fio_devdax_file(struct thread_data * td,struct fio_file * f,size_t length,off_t off)72 static int fio_devdax_file(struct thread_data *td, struct fio_file *f,
73 			   size_t length, off_t off)
74 {
75 	struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
76 	int flags = 0;
77 
78 	if (td_rw(td))
79 		flags = PROT_READ | PROT_WRITE;
80 	else if (td_write(td)) {
81 		flags = PROT_WRITE;
82 
83 		if (td->o.verify != VERIFY_NONE)
84 			flags |= PROT_READ;
85 	} else
86 		flags = PROT_READ;
87 
88 	fdd->devdax_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off);
89 	if (fdd->devdax_ptr == MAP_FAILED) {
90 		fdd->devdax_ptr = NULL;
91 		td_verror(td, errno, "mmap");
92 	}
93 
94 	if (td->error && fdd->devdax_ptr)
95 		munmap(fdd->devdax_ptr, length);
96 
97 	return td->error;
98 }
99 
100 /*
101  * Just mmap an appropriate portion, we cannot mmap the full extent
102  */
fio_devdax_prep_limited(struct thread_data * td,struct io_u * io_u)103 static int fio_devdax_prep_limited(struct thread_data *td, struct io_u *io_u)
104 {
105 	struct fio_file *f = io_u->file;
106 	struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
107 
108 	if (io_u->buflen > f->real_file_size) {
109 		log_err("dev-dax: bs too big for dev-dax engine\n");
110 		return EIO;
111 	}
112 
113 	fdd->devdax_sz = min(MMAP_TOTAL_SZ, f->real_file_size);
114 	if (fdd->devdax_sz > f->io_size)
115 		fdd->devdax_sz = f->io_size;
116 
117 	fdd->devdax_off = io_u->offset;
118 
119 	return fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off);
120 }
121 
122 /*
123  * Attempt to mmap the entire file
124  */
fio_devdax_prep_full(struct thread_data * td,struct io_u * io_u)125 static int fio_devdax_prep_full(struct thread_data *td, struct io_u *io_u)
126 {
127 	struct fio_file *f = io_u->file;
128 	struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
129 	int ret;
130 
131 	if (fio_file_partial_mmap(f))
132 		return EINVAL;
133 
134 	if (io_u->offset != (size_t) io_u->offset ||
135 	    f->io_size != (size_t) f->io_size) {
136 		fio_file_set_partial_mmap(f);
137 		return EINVAL;
138 	}
139 
140 	fdd->devdax_sz = f->io_size;
141 	fdd->devdax_off = 0;
142 
143 	ret = fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off);
144 	if (ret)
145 		fio_file_set_partial_mmap(f);
146 
147 	return ret;
148 }
149 
fio_devdax_prep(struct thread_data * td,struct io_u * io_u)150 static int fio_devdax_prep(struct thread_data *td, struct io_u *io_u)
151 {
152 	struct fio_file *f = io_u->file;
153 	struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
154 	int ret;
155 
156 	/*
157 	 * It fits within existing mapping, use it
158 	 */
159 	if (io_u->offset >= fdd->devdax_off &&
160 	    io_u->offset + io_u->buflen < fdd->devdax_off + fdd->devdax_sz)
161 		goto done;
162 
163 	/*
164 	 * unmap any existing mapping
165 	 */
166 	if (fdd->devdax_ptr) {
167 		if (munmap(fdd->devdax_ptr, fdd->devdax_sz) < 0)
168 			return errno;
169 		fdd->devdax_ptr = NULL;
170 	}
171 
172 	if (fio_devdax_prep_full(td, io_u)) {
173 		td_clear_error(td);
174 		ret = fio_devdax_prep_limited(td, io_u);
175 		if (ret)
176 			return ret;
177 	}
178 
179 done:
180 	io_u->mmap_data = fdd->devdax_ptr + io_u->offset - fdd->devdax_off -
181 				f->file_offset;
182 	return 0;
183 }
184 
fio_devdax_queue(struct thread_data * td,struct io_u * io_u)185 static int fio_devdax_queue(struct thread_data *td, struct io_u *io_u)
186 {
187 	fio_ro_check(td, io_u);
188 	io_u->error = 0;
189 
190 	switch (io_u->ddir) {
191 	case DDIR_READ:
192 		memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen);
193 		break;
194 	case DDIR_WRITE:
195 		pmem_memcpy_persist(io_u->mmap_data, io_u->xfer_buf,
196 				    io_u->xfer_buflen);
197 		break;
198 	case DDIR_SYNC:
199 	case DDIR_DATASYNC:
200 	case DDIR_SYNC_FILE_RANGE:
201 		break;
202 	default:
203 		io_u->error = EINVAL;
204 		break;
205 	}
206 
207 	return FIO_Q_COMPLETED;
208 }
209 
fio_devdax_init(struct thread_data * td)210 static int fio_devdax_init(struct thread_data *td)
211 {
212 	struct thread_options *o = &td->o;
213 
214 	if ((o->rw_min_bs & page_mask) &&
215 	    (o->fsync_blocks || o->fdatasync_blocks)) {
216 		log_err("dev-dax: mmap options dictate a minimum block size of %llu bytes\n",
217 			(unsigned long long) page_size);
218 		return 1;
219 	}
220 
221 	return 0;
222 }
223 
fio_devdax_open_file(struct thread_data * td,struct fio_file * f)224 static int fio_devdax_open_file(struct thread_data *td, struct fio_file *f)
225 {
226 	struct fio_devdax_data *fdd;
227 	int ret;
228 
229 	ret = generic_open_file(td, f);
230 	if (ret)
231 		return ret;
232 
233 	fdd = calloc(1, sizeof(*fdd));
234 	if (!fdd) {
235 		int fio_unused __ret;
236 		__ret = generic_close_file(td, f);
237 		return 1;
238 	}
239 
240 	FILE_SET_ENG_DATA(f, fdd);
241 
242 	return 0;
243 }
244 
fio_devdax_close_file(struct thread_data * td,struct fio_file * f)245 static int fio_devdax_close_file(struct thread_data *td, struct fio_file *f)
246 {
247 	struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
248 
249 	FILE_SET_ENG_DATA(f, NULL);
250 	free(fdd);
251 	fio_file_clear_partial_mmap(f);
252 
253 	return generic_close_file(td, f);
254 }
255 
256 static int
fio_devdax_get_file_size(struct thread_data * td,struct fio_file * f)257 fio_devdax_get_file_size(struct thread_data *td, struct fio_file *f)
258 {
259 	char spath[PATH_MAX];
260 	char npath[PATH_MAX];
261 	char *rpath;
262 	FILE *sfile;
263 	uint64_t size;
264 	struct stat st;
265 	int rc;
266 
267 	if (fio_file_size_known(f))
268 		return 0;
269 
270 	if (f->filetype != FIO_TYPE_CHAR)
271 		return -EINVAL;
272 
273 	rc = stat(f->file_name, &st);
274 	if (rc < 0) {
275 		log_err("%s: failed to stat file %s (%s)\n",
276 			td->o.name, f->file_name, strerror(errno));
277 		return -errno;
278 	}
279 
280 	snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/subsystem",
281 		 major(st.st_rdev), minor(st.st_rdev));
282 
283 	rpath = realpath(spath, npath);
284 	if (!rpath) {
285 		log_err("%s: realpath on %s failed (%s)\n",
286 			td->o.name, spath, strerror(errno));
287 		return -errno;
288 	}
289 
290 	/* check if DAX device */
291 	if (strcmp("/sys/class/dax", rpath)) {
292 		log_err("%s: %s not a DAX device!\n",
293 			td->o.name, f->file_name);
294 	}
295 
296 	snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/size",
297 		 major(st.st_rdev), minor(st.st_rdev));
298 
299 	sfile = fopen(spath, "r");
300 	if (!sfile) {
301 		log_err("%s: fopen on %s failed (%s)\n",
302 			td->o.name, spath, strerror(errno));
303 		return 1;
304 	}
305 
306 	rc = fscanf(sfile, "%lu", &size);
307 	if (rc < 0) {
308 		log_err("%s: fscanf on %s failed (%s)\n",
309 			td->o.name, spath, strerror(errno));
310 		return 1;
311 	}
312 
313 	f->real_file_size = size;
314 
315 	fclose(sfile);
316 
317 	if (f->file_offset > f->real_file_size) {
318 		log_err("%s: offset extends end (%llu > %llu)\n", td->o.name,
319 					(unsigned long long) f->file_offset,
320 					(unsigned long long) f->real_file_size);
321 		return 1;
322 	}
323 
324 	fio_file_set_size_known(f);
325 	return 0;
326 }
327 
328 static struct ioengine_ops ioengine = {
329 	.name		= "dev-dax",
330 	.version	= FIO_IOOPS_VERSION,
331 	.init		= fio_devdax_init,
332 	.prep		= fio_devdax_prep,
333 	.queue		= fio_devdax_queue,
334 	.open_file	= fio_devdax_open_file,
335 	.close_file	= fio_devdax_close_file,
336 	.get_file_size	= fio_devdax_get_file_size,
337 	.flags		= FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL,
338 };
339 
fio_devdax_register(void)340 static void fio_init fio_devdax_register(void)
341 {
342 	register_ioengine(&ioengine);
343 }
344 
fio_devdax_unregister(void)345 static void fio_exit fio_devdax_unregister(void)
346 {
347 	unregister_ioengine(&ioengine);
348 }
349