• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <unistd.h>
2 #include <fcntl.h>
3 #include <string.h>
4 #include <signal.h>
5 #include <time.h>
6 #include <assert.h>
7 
8 #include "fio.h"
9 #include "hash.h"
10 #include "verify.h"
11 #include "trim.h"
12 #include "lib/rand.h"
13 #include "lib/axmap.h"
14 #include "err.h"
15 #include "lib/pow2.h"
16 #include "minmax.h"
17 
18 struct io_completion_data {
19 	int nr;				/* input */
20 
21 	int error;			/* output */
22 	uint64_t bytes_done[DDIR_RWDIR_CNT];	/* output */
23 	struct timeval time;		/* output */
24 };
25 
26 /*
27  * The ->io_axmap contains a map of blocks we have or have not done io
28  * to yet. Used to make sure we cover the entire range in a fair fashion.
29  */
random_map_free(struct fio_file * f,const uint64_t block)30 static bool random_map_free(struct fio_file *f, const uint64_t block)
31 {
32 	return !axmap_isset(f->io_axmap, block);
33 }
34 
35 /*
36  * Mark a given offset as used in the map.
37  */
mark_random_map(struct thread_data * td,struct io_u * io_u)38 static void mark_random_map(struct thread_data *td, struct io_u *io_u)
39 {
40 	unsigned int min_bs = td->o.rw_min_bs;
41 	struct fio_file *f = io_u->file;
42 	unsigned int nr_blocks;
43 	uint64_t block;
44 
45 	block = (io_u->offset - f->file_offset) / (uint64_t) min_bs;
46 	nr_blocks = (io_u->buflen + min_bs - 1) / min_bs;
47 
48 	if (!(io_u->flags & IO_U_F_BUSY_OK))
49 		nr_blocks = axmap_set_nr(f->io_axmap, block, nr_blocks);
50 
51 	if ((nr_blocks * min_bs) < io_u->buflen)
52 		io_u->buflen = nr_blocks * min_bs;
53 }
54 
last_block(struct thread_data * td,struct fio_file * f,enum fio_ddir ddir)55 static uint64_t last_block(struct thread_data *td, struct fio_file *f,
56 			   enum fio_ddir ddir)
57 {
58 	uint64_t max_blocks;
59 	uint64_t max_size;
60 
61 	assert(ddir_rw(ddir));
62 
63 	/*
64 	 * Hmm, should we make sure that ->io_size <= ->real_file_size?
65 	 * -> not for now since there is code assuming it could go either.
66 	 */
67 	max_size = f->io_size;
68 	if (max_size > f->real_file_size)
69 		max_size = f->real_file_size;
70 
71 	if (td->o.zone_range)
72 		max_size = td->o.zone_range;
73 
74 	if (td->o.min_bs[ddir] > td->o.ba[ddir])
75 		max_size -= td->o.min_bs[ddir] - td->o.ba[ddir];
76 
77 	max_blocks = max_size / (uint64_t) td->o.ba[ddir];
78 	if (!max_blocks)
79 		return 0;
80 
81 	return max_blocks;
82 }
83 
84 struct rand_off {
85 	struct flist_head list;
86 	uint64_t off;
87 };
88 
__get_next_rand_offset(struct thread_data * td,struct fio_file * f,enum fio_ddir ddir,uint64_t * b,uint64_t lastb)89 static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f,
90 				  enum fio_ddir ddir, uint64_t *b,
91 				  uint64_t lastb)
92 {
93 	uint64_t r;
94 
95 	if (td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE ||
96 	    td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE64) {
97 
98 		r = __rand(&td->random_state);
99 
100 		dprint(FD_RANDOM, "off rand %llu\n", (unsigned long long) r);
101 
102 		*b = lastb * (r / (rand_max(&td->random_state) + 1.0));
103 	} else {
104 		uint64_t off = 0;
105 
106 		assert(fio_file_lfsr(f));
107 
108 		if (lfsr_next(&f->lfsr, &off))
109 			return 1;
110 
111 		*b = off;
112 	}
113 
114 	/*
115 	 * if we are not maintaining a random map, we are done.
116 	 */
117 	if (!file_randommap(td, f))
118 		goto ret;
119 
120 	/*
121 	 * calculate map offset and check if it's free
122 	 */
123 	if (random_map_free(f, *b))
124 		goto ret;
125 
126 	dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n",
127 						(unsigned long long) *b);
128 
129 	*b = axmap_next_free(f->io_axmap, *b);
130 	if (*b == (uint64_t) -1ULL)
131 		return 1;
132 ret:
133 	return 0;
134 }
135 
__get_next_rand_offset_zipf(struct thread_data * td,struct fio_file * f,enum fio_ddir ddir,uint64_t * b)136 static int __get_next_rand_offset_zipf(struct thread_data *td,
137 				       struct fio_file *f, enum fio_ddir ddir,
138 				       uint64_t *b)
139 {
140 	*b = zipf_next(&f->zipf);
141 	return 0;
142 }
143 
__get_next_rand_offset_pareto(struct thread_data * td,struct fio_file * f,enum fio_ddir ddir,uint64_t * b)144 static int __get_next_rand_offset_pareto(struct thread_data *td,
145 					 struct fio_file *f, enum fio_ddir ddir,
146 					 uint64_t *b)
147 {
148 	*b = pareto_next(&f->zipf);
149 	return 0;
150 }
151 
__get_next_rand_offset_gauss(struct thread_data * td,struct fio_file * f,enum fio_ddir ddir,uint64_t * b)152 static int __get_next_rand_offset_gauss(struct thread_data *td,
153 					struct fio_file *f, enum fio_ddir ddir,
154 					uint64_t *b)
155 {
156 	*b = gauss_next(&f->gauss);
157 	return 0;
158 }
159 
__get_next_rand_offset_zoned(struct thread_data * td,struct fio_file * f,enum fio_ddir ddir,uint64_t * b)160 static int __get_next_rand_offset_zoned(struct thread_data *td,
161 					struct fio_file *f, enum fio_ddir ddir,
162 					uint64_t *b)
163 {
164 	unsigned int v, send, stotal;
165 	uint64_t offset, lastb;
166 	static int warned;
167 	struct zone_split_index *zsi;
168 
169 	lastb = last_block(td, f, ddir);
170 	if (!lastb)
171 		return 1;
172 
173 	if (!td->o.zone_split_nr[ddir]) {
174 bail:
175 		return __get_next_rand_offset(td, f, ddir, b, lastb);
176 	}
177 
178 	/*
179 	 * Generate a value, v, between 1 and 100, both inclusive
180 	 */
181 	v = rand32_between(&td->zone_state, 1, 100);
182 
183 	zsi = &td->zone_state_index[ddir][v - 1];
184 	stotal = zsi->size_perc_prev;
185 	send = zsi->size_perc;
186 
187 	/*
188 	 * Should never happen
189 	 */
190 	if (send == -1U) {
191 		if (!warned) {
192 			log_err("fio: bug in zoned generation\n");
193 			warned = 1;
194 		}
195 		goto bail;
196 	}
197 
198 	/*
199 	 * 'send' is some percentage below or equal to 100 that
200 	 * marks the end of the current IO range. 'stotal' marks
201 	 * the start, in percent.
202 	 */
203 	if (stotal)
204 		offset = stotal * lastb / 100ULL;
205 	else
206 		offset = 0;
207 
208 	lastb = lastb * (send - stotal) / 100ULL;
209 
210 	/*
211 	 * Generate index from 0..send-of-lastb
212 	 */
213 	if (__get_next_rand_offset(td, f, ddir, b, lastb) == 1)
214 		return 1;
215 
216 	/*
217 	 * Add our start offset, if any
218 	 */
219 	if (offset)
220 		*b += offset;
221 
222 	return 0;
223 }
224 
flist_cmp(void * data,struct flist_head * a,struct flist_head * b)225 static int flist_cmp(void *data, struct flist_head *a, struct flist_head *b)
226 {
227 	struct rand_off *r1 = flist_entry(a, struct rand_off, list);
228 	struct rand_off *r2 = flist_entry(b, struct rand_off, list);
229 
230 	return r1->off - r2->off;
231 }
232 
get_off_from_method(struct thread_data * td,struct fio_file * f,enum fio_ddir ddir,uint64_t * b)233 static int get_off_from_method(struct thread_data *td, struct fio_file *f,
234 			       enum fio_ddir ddir, uint64_t *b)
235 {
236 	if (td->o.random_distribution == FIO_RAND_DIST_RANDOM) {
237 		uint64_t lastb;
238 
239 		lastb = last_block(td, f, ddir);
240 		if (!lastb)
241 			return 1;
242 
243 		return __get_next_rand_offset(td, f, ddir, b, lastb);
244 	} else if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
245 		return __get_next_rand_offset_zipf(td, f, ddir, b);
246 	else if (td->o.random_distribution == FIO_RAND_DIST_PARETO)
247 		return __get_next_rand_offset_pareto(td, f, ddir, b);
248 	else if (td->o.random_distribution == FIO_RAND_DIST_GAUSS)
249 		return __get_next_rand_offset_gauss(td, f, ddir, b);
250 	else if (td->o.random_distribution == FIO_RAND_DIST_ZONED)
251 		return __get_next_rand_offset_zoned(td, f, ddir, b);
252 
253 	log_err("fio: unknown random distribution: %d\n", td->o.random_distribution);
254 	return 1;
255 }
256 
257 /*
258  * Sort the reads for a verify phase in batches of verifysort_nr, if
259  * specified.
260  */
should_sort_io(struct thread_data * td)261 static inline bool should_sort_io(struct thread_data *td)
262 {
263 	if (!td->o.verifysort_nr || !td->o.do_verify)
264 		return false;
265 	if (!td_random(td))
266 		return false;
267 	if (td->runstate != TD_VERIFYING)
268 		return false;
269 	if (td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE ||
270 	    td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE64)
271 		return false;
272 
273 	return true;
274 }
275 
should_do_random(struct thread_data * td,enum fio_ddir ddir)276 static bool should_do_random(struct thread_data *td, enum fio_ddir ddir)
277 {
278 	unsigned int v;
279 
280 	if (td->o.perc_rand[ddir] == 100)
281 		return true;
282 
283 	v = rand32_between(&td->seq_rand_state[ddir], 1, 100);
284 
285 	return v <= td->o.perc_rand[ddir];
286 }
287 
get_next_rand_offset(struct thread_data * td,struct fio_file * f,enum fio_ddir ddir,uint64_t * b)288 static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
289 				enum fio_ddir ddir, uint64_t *b)
290 {
291 	struct rand_off *r;
292 	int i, ret = 1;
293 
294 	if (!should_sort_io(td))
295 		return get_off_from_method(td, f, ddir, b);
296 
297 	if (!flist_empty(&td->next_rand_list)) {
298 fetch:
299 		r = flist_first_entry(&td->next_rand_list, struct rand_off, list);
300 		flist_del(&r->list);
301 		*b = r->off;
302 		free(r);
303 		return 0;
304 	}
305 
306 	for (i = 0; i < td->o.verifysort_nr; i++) {
307 		r = malloc(sizeof(*r));
308 
309 		ret = get_off_from_method(td, f, ddir, &r->off);
310 		if (ret) {
311 			free(r);
312 			break;
313 		}
314 
315 		flist_add(&r->list, &td->next_rand_list);
316 	}
317 
318 	if (ret && !i)
319 		return ret;
320 
321 	assert(!flist_empty(&td->next_rand_list));
322 	flist_sort(NULL, &td->next_rand_list, flist_cmp);
323 	goto fetch;
324 }
325 
get_next_rand_block(struct thread_data * td,struct fio_file * f,enum fio_ddir ddir,uint64_t * b)326 static int get_next_rand_block(struct thread_data *td, struct fio_file *f,
327 			       enum fio_ddir ddir, uint64_t *b)
328 {
329 	if (!get_next_rand_offset(td, f, ddir, b))
330 		return 0;
331 
332 	if (td->o.time_based ||
333 	    (td->o.file_service_type & __FIO_FSERVICE_NONUNIFORM)) {
334 		fio_file_reset(td, f);
335 		if (!get_next_rand_offset(td, f, ddir, b))
336 			return 0;
337 	}
338 
339 	dprint(FD_IO, "%s: rand offset failed, last=%llu, size=%llu\n",
340 			f->file_name, (unsigned long long) f->last_pos[ddir],
341 			(unsigned long long) f->real_file_size);
342 	return 1;
343 }
344 
get_next_seq_offset(struct thread_data * td,struct fio_file * f,enum fio_ddir ddir,uint64_t * offset)345 static int get_next_seq_offset(struct thread_data *td, struct fio_file *f,
346 			       enum fio_ddir ddir, uint64_t *offset)
347 {
348 	struct thread_options *o = &td->o;
349 
350 	assert(ddir_rw(ddir));
351 
352 	if (f->last_pos[ddir] >= f->io_size + get_start_offset(td, f) &&
353 	    o->time_based) {
354 		struct thread_options *o = &td->o;
355 		uint64_t io_size = f->io_size + (f->io_size % o->min_bs[ddir]);
356 
357 		if (io_size > f->last_pos[ddir])
358 			f->last_pos[ddir] = 0;
359 		else
360 			f->last_pos[ddir] = f->last_pos[ddir] - io_size;
361 	}
362 
363 	if (f->last_pos[ddir] < f->real_file_size) {
364 		uint64_t pos;
365 
366 		if (f->last_pos[ddir] == f->file_offset && o->ddir_seq_add < 0) {
367 			if (f->real_file_size > f->io_size)
368 				f->last_pos[ddir] = f->io_size;
369 			else
370 				f->last_pos[ddir] = f->real_file_size;
371 		}
372 
373 		pos = f->last_pos[ddir] - f->file_offset;
374 		if (pos && o->ddir_seq_add) {
375 			pos += o->ddir_seq_add;
376 
377 			/*
378 			 * If we reach beyond the end of the file
379 			 * with holed IO, wrap around to the
380 			 * beginning again. If we're doing backwards IO,
381 			 * wrap to the end.
382 			 */
383 			if (pos >= f->real_file_size) {
384 				if (o->ddir_seq_add > 0)
385 					pos = f->file_offset;
386 				else {
387 					if (f->real_file_size > f->io_size)
388 						pos = f->io_size;
389 					else
390 						pos = f->real_file_size;
391 
392 					pos += o->ddir_seq_add;
393 				}
394 			}
395 		}
396 
397 		*offset = pos;
398 		return 0;
399 	}
400 
401 	return 1;
402 }
403 
get_next_block(struct thread_data * td,struct io_u * io_u,enum fio_ddir ddir,int rw_seq,unsigned int * is_random)404 static int get_next_block(struct thread_data *td, struct io_u *io_u,
405 			  enum fio_ddir ddir, int rw_seq,
406 			  unsigned int *is_random)
407 {
408 	struct fio_file *f = io_u->file;
409 	uint64_t b, offset;
410 	int ret;
411 
412 	assert(ddir_rw(ddir));
413 
414 	b = offset = -1ULL;
415 
416 	if (rw_seq) {
417 		if (td_random(td)) {
418 			if (should_do_random(td, ddir)) {
419 				ret = get_next_rand_block(td, f, ddir, &b);
420 				*is_random = 1;
421 			} else {
422 				*is_random = 0;
423 				io_u_set(td, io_u, IO_U_F_BUSY_OK);
424 				ret = get_next_seq_offset(td, f, ddir, &offset);
425 				if (ret)
426 					ret = get_next_rand_block(td, f, ddir, &b);
427 			}
428 		} else {
429 			*is_random = 0;
430 			ret = get_next_seq_offset(td, f, ddir, &offset);
431 		}
432 	} else {
433 		io_u_set(td, io_u, IO_U_F_BUSY_OK);
434 		*is_random = 0;
435 
436 		if (td->o.rw_seq == RW_SEQ_SEQ) {
437 			ret = get_next_seq_offset(td, f, ddir, &offset);
438 			if (ret) {
439 				ret = get_next_rand_block(td, f, ddir, &b);
440 				*is_random = 0;
441 			}
442 		} else if (td->o.rw_seq == RW_SEQ_IDENT) {
443 			if (f->last_start[ddir] != -1ULL)
444 				offset = f->last_start[ddir] - f->file_offset;
445 			else
446 				offset = 0;
447 			ret = 0;
448 		} else {
449 			log_err("fio: unknown rw_seq=%d\n", td->o.rw_seq);
450 			ret = 1;
451 		}
452 	}
453 
454 	if (!ret) {
455 		if (offset != -1ULL)
456 			io_u->offset = offset;
457 		else if (b != -1ULL)
458 			io_u->offset = b * td->o.ba[ddir];
459 		else {
460 			log_err("fio: bug in offset generation: offset=%llu, b=%llu\n", (unsigned long long) offset, (unsigned long long) b);
461 			ret = 1;
462 		}
463 	}
464 
465 	return ret;
466 }
467 
468 /*
469  * For random io, generate a random new block and see if it's used. Repeat
470  * until we find a free one. For sequential io, just return the end of
471  * the last io issued.
472  */
__get_next_offset(struct thread_data * td,struct io_u * io_u,unsigned int * is_random)473 static int __get_next_offset(struct thread_data *td, struct io_u *io_u,
474 			     unsigned int *is_random)
475 {
476 	struct fio_file *f = io_u->file;
477 	enum fio_ddir ddir = io_u->ddir;
478 	int rw_seq_hit = 0;
479 
480 	assert(ddir_rw(ddir));
481 
482 	if (td->o.ddir_seq_nr && !--td->ddir_seq_nr) {
483 		rw_seq_hit = 1;
484 		td->ddir_seq_nr = td->o.ddir_seq_nr;
485 	}
486 
487 	if (get_next_block(td, io_u, ddir, rw_seq_hit, is_random))
488 		return 1;
489 
490 	if (io_u->offset >= f->io_size) {
491 		dprint(FD_IO, "get_next_offset: offset %llu >= io_size %llu\n",
492 					(unsigned long long) io_u->offset,
493 					(unsigned long long) f->io_size);
494 		return 1;
495 	}
496 
497 	io_u->offset += f->file_offset;
498 	if (io_u->offset >= f->real_file_size) {
499 		dprint(FD_IO, "get_next_offset: offset %llu >= size %llu\n",
500 					(unsigned long long) io_u->offset,
501 					(unsigned long long) f->real_file_size);
502 		return 1;
503 	}
504 
505 	return 0;
506 }
507 
get_next_offset(struct thread_data * td,struct io_u * io_u,unsigned int * is_random)508 static int get_next_offset(struct thread_data *td, struct io_u *io_u,
509 			   unsigned int *is_random)
510 {
511 	if (td->flags & TD_F_PROFILE_OPS) {
512 		struct prof_io_ops *ops = &td->prof_io_ops;
513 
514 		if (ops->fill_io_u_off)
515 			return ops->fill_io_u_off(td, io_u, is_random);
516 	}
517 
518 	return __get_next_offset(td, io_u, is_random);
519 }
520 
io_u_fits(struct thread_data * td,struct io_u * io_u,unsigned int buflen)521 static inline bool io_u_fits(struct thread_data *td, struct io_u *io_u,
522 			     unsigned int buflen)
523 {
524 	struct fio_file *f = io_u->file;
525 
526 	return io_u->offset + buflen <= f->io_size + get_start_offset(td, f);
527 }
528 
__get_next_buflen(struct thread_data * td,struct io_u * io_u,unsigned int is_random)529 static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u,
530 				      unsigned int is_random)
531 {
532 	int ddir = io_u->ddir;
533 	unsigned int buflen = 0;
534 	unsigned int minbs, maxbs;
535 	uint64_t frand_max, r;
536 	bool power_2;
537 
538 	assert(ddir_rw(ddir));
539 
540 	if (td->o.bs_is_seq_rand)
541 		ddir = is_random ? DDIR_WRITE: DDIR_READ;
542 
543 	minbs = td->o.min_bs[ddir];
544 	maxbs = td->o.max_bs[ddir];
545 
546 	if (minbs == maxbs)
547 		return minbs;
548 
549 	/*
550 	 * If we can't satisfy the min block size from here, then fail
551 	 */
552 	if (!io_u_fits(td, io_u, minbs))
553 		return 0;
554 
555 	frand_max = rand_max(&td->bsrange_state);
556 	do {
557 		r = __rand(&td->bsrange_state);
558 
559 		if (!td->o.bssplit_nr[ddir]) {
560 			buflen = 1 + (unsigned int) ((double) maxbs *
561 					(r / (frand_max + 1.0)));
562 			if (buflen < minbs)
563 				buflen = minbs;
564 		} else {
565 			long long perc = 0;
566 			unsigned int i;
567 
568 			for (i = 0; i < td->o.bssplit_nr[ddir]; i++) {
569 				struct bssplit *bsp = &td->o.bssplit[ddir][i];
570 
571 				buflen = bsp->bs;
572 				perc += bsp->perc;
573 				if (!perc)
574 					break;
575 				if ((r / perc <= frand_max / 100ULL) &&
576 				    io_u_fits(td, io_u, buflen))
577 					break;
578 			}
579 		}
580 
581 		power_2 = is_power_of_2(minbs);
582 		if (!td->o.bs_unaligned && power_2)
583 			buflen &= ~(minbs - 1);
584 		else if (!td->o.bs_unaligned && !power_2)
585 			buflen -= buflen % minbs;
586 	} while (!io_u_fits(td, io_u, buflen));
587 
588 	return buflen;
589 }
590 
get_next_buflen(struct thread_data * td,struct io_u * io_u,unsigned int is_random)591 static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u,
592 				    unsigned int is_random)
593 {
594 	if (td->flags & TD_F_PROFILE_OPS) {
595 		struct prof_io_ops *ops = &td->prof_io_ops;
596 
597 		if (ops->fill_io_u_size)
598 			return ops->fill_io_u_size(td, io_u, is_random);
599 	}
600 
601 	return __get_next_buflen(td, io_u, is_random);
602 }
603 
set_rwmix_bytes(struct thread_data * td)604 static void set_rwmix_bytes(struct thread_data *td)
605 {
606 	unsigned int diff;
607 
608 	/*
609 	 * we do time or byte based switch. this is needed because
610 	 * buffered writes may issue a lot quicker than they complete,
611 	 * whereas reads do not.
612 	 */
613 	diff = td->o.rwmix[td->rwmix_ddir ^ 1];
614 	td->rwmix_issues = (td->io_issues[td->rwmix_ddir] * diff) / 100;
615 }
616 
get_rand_ddir(struct thread_data * td)617 static inline enum fio_ddir get_rand_ddir(struct thread_data *td)
618 {
619 	unsigned int v;
620 
621 	v = rand32_between(&td->rwmix_state, 1, 100);
622 
623 	if (v <= td->o.rwmix[DDIR_READ])
624 		return DDIR_READ;
625 
626 	return DDIR_WRITE;
627 }
628 
io_u_quiesce(struct thread_data * td)629 int io_u_quiesce(struct thread_data *td)
630 {
631 	int completed = 0;
632 
633 	/*
634 	 * We are going to sleep, ensure that we flush anything pending as
635 	 * not to skew our latency numbers.
636 	 *
637 	 * Changed to only monitor 'in flight' requests here instead of the
638 	 * td->cur_depth, b/c td->cur_depth does not accurately represent
639 	 * io's that have been actually submitted to an async engine,
640 	 * and cur_depth is meaningless for sync engines.
641 	 */
642 	if (td->io_u_queued || td->cur_depth) {
643 		int fio_unused ret;
644 
645 		ret = td_io_commit(td);
646 	}
647 
648 	while (td->io_u_in_flight) {
649 		int ret;
650 
651 		ret = io_u_queued_complete(td, 1);
652 		if (ret > 0)
653 			completed += ret;
654 	}
655 
656 	if (td->flags & TD_F_REGROW_LOGS)
657 		regrow_logs(td);
658 
659 	return completed;
660 }
661 
rate_ddir(struct thread_data * td,enum fio_ddir ddir)662 static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir)
663 {
664 	enum fio_ddir odir = ddir ^ 1;
665 	long usec;
666 	uint64_t now;
667 
668 	assert(ddir_rw(ddir));
669 	now = utime_since_now(&td->start);
670 
671 	/*
672 	 * if rate_next_io_time is in the past, need to catch up to rate
673 	 */
674 	if (td->rate_next_io_time[ddir] <= now)
675 		return ddir;
676 
677 	/*
678 	 * We are ahead of rate in this direction. See if we
679 	 * should switch.
680 	 */
681 	if (td_rw(td) && td->o.rwmix[odir]) {
682 		/*
683 		 * Other direction is behind rate, switch
684 		 */
685 		if (td->rate_next_io_time[odir] <= now)
686 			return odir;
687 
688 		/*
689 		 * Both directions are ahead of rate. sleep the min
690 		 * switch if necissary
691 		 */
692 		if (td->rate_next_io_time[ddir] <=
693 			td->rate_next_io_time[odir]) {
694 			usec = td->rate_next_io_time[ddir] - now;
695 		} else {
696 			usec = td->rate_next_io_time[odir] - now;
697 			ddir = odir;
698 		}
699 	} else
700 		usec = td->rate_next_io_time[ddir] - now;
701 
702 	if (td->o.io_submit_mode == IO_MODE_INLINE)
703 		io_u_quiesce(td);
704 
705 	usec = usec_sleep(td, usec);
706 
707 	return ddir;
708 }
709 
710 /*
711  * Return the data direction for the next io_u. If the job is a
712  * mixed read/write workload, check the rwmix cycle and switch if
713  * necessary.
714  */
get_rw_ddir(struct thread_data * td)715 static enum fio_ddir get_rw_ddir(struct thread_data *td)
716 {
717 	enum fio_ddir ddir;
718 
719 	/*
720 	 * See if it's time to fsync/fdatasync/sync_file_range first,
721 	 * and if not then move on to check regular I/Os.
722 	 */
723 	if (should_fsync(td)) {
724 		if (td->o.fsync_blocks && td->io_issues[DDIR_WRITE] &&
725 		    !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks))
726 			return DDIR_SYNC;
727 
728 		if (td->o.fdatasync_blocks && td->io_issues[DDIR_WRITE] &&
729 		    !(td->io_issues[DDIR_WRITE] % td->o.fdatasync_blocks))
730 			return DDIR_DATASYNC;
731 
732 		if (td->sync_file_range_nr && td->io_issues[DDIR_WRITE] &&
733 		    !(td->io_issues[DDIR_WRITE] % td->sync_file_range_nr))
734 			return DDIR_SYNC_FILE_RANGE;
735 	}
736 
737 	if (td_rw(td)) {
738 		/*
739 		 * Check if it's time to seed a new data direction.
740 		 */
741 		if (td->io_issues[td->rwmix_ddir] >= td->rwmix_issues) {
742 			/*
743 			 * Put a top limit on how many bytes we do for
744 			 * one data direction, to avoid overflowing the
745 			 * ranges too much
746 			 */
747 			ddir = get_rand_ddir(td);
748 
749 			if (ddir != td->rwmix_ddir)
750 				set_rwmix_bytes(td);
751 
752 			td->rwmix_ddir = ddir;
753 		}
754 		ddir = td->rwmix_ddir;
755 	} else if (td_read(td))
756 		ddir = DDIR_READ;
757 	else if (td_write(td))
758 		ddir = DDIR_WRITE;
759 	else if (td_trim(td))
760 		ddir = DDIR_TRIM;
761 	else
762 		ddir = DDIR_INVAL;
763 
764 	td->rwmix_ddir = rate_ddir(td, ddir);
765 	return td->rwmix_ddir;
766 }
767 
set_rw_ddir(struct thread_data * td,struct io_u * io_u)768 static void set_rw_ddir(struct thread_data *td, struct io_u *io_u)
769 {
770 	enum fio_ddir ddir = get_rw_ddir(td);
771 
772 	if (td_trimwrite(td)) {
773 		struct fio_file *f = io_u->file;
774 		if (f->last_pos[DDIR_WRITE] == f->last_pos[DDIR_TRIM])
775 			ddir = DDIR_TRIM;
776 		else
777 			ddir = DDIR_WRITE;
778 	}
779 
780 	io_u->ddir = io_u->acct_ddir = ddir;
781 
782 	if (io_u->ddir == DDIR_WRITE && td_ioengine_flagged(td, FIO_BARRIER) &&
783 	    td->o.barrier_blocks &&
784 	   !(td->io_issues[DDIR_WRITE] % td->o.barrier_blocks) &&
785 	     td->io_issues[DDIR_WRITE])
786 		io_u_set(td, io_u, IO_U_F_BARRIER);
787 }
788 
put_file_log(struct thread_data * td,struct fio_file * f)789 void put_file_log(struct thread_data *td, struct fio_file *f)
790 {
791 	unsigned int ret = put_file(td, f);
792 
793 	if (ret)
794 		td_verror(td, ret, "file close");
795 }
796 
put_io_u(struct thread_data * td,struct io_u * io_u)797 void put_io_u(struct thread_data *td, struct io_u *io_u)
798 {
799 	if (td->parent)
800 		td = td->parent;
801 
802 	td_io_u_lock(td);
803 
804 	if (io_u->file && !(io_u->flags & IO_U_F_NO_FILE_PUT))
805 		put_file_log(td, io_u->file);
806 
807 	io_u->file = NULL;
808 	io_u_set(td, io_u, IO_U_F_FREE);
809 
810 	if (io_u->flags & IO_U_F_IN_CUR_DEPTH) {
811 		td->cur_depth--;
812 		assert(!(td->flags & TD_F_CHILD));
813 	}
814 	io_u_qpush(&td->io_u_freelist, io_u);
815 	td_io_u_unlock(td);
816 	td_io_u_free_notify(td);
817 }
818 
clear_io_u(struct thread_data * td,struct io_u * io_u)819 void clear_io_u(struct thread_data *td, struct io_u *io_u)
820 {
821 	io_u_clear(td, io_u, IO_U_F_FLIGHT);
822 	put_io_u(td, io_u);
823 }
824 
requeue_io_u(struct thread_data * td,struct io_u ** io_u)825 void requeue_io_u(struct thread_data *td, struct io_u **io_u)
826 {
827 	struct io_u *__io_u = *io_u;
828 	enum fio_ddir ddir = acct_ddir(__io_u);
829 
830 	dprint(FD_IO, "requeue %p\n", __io_u);
831 
832 	if (td->parent)
833 		td = td->parent;
834 
835 	td_io_u_lock(td);
836 
837 	io_u_set(td, __io_u, IO_U_F_FREE);
838 	if ((__io_u->flags & IO_U_F_FLIGHT) && ddir_rw(ddir))
839 		td->io_issues[ddir]--;
840 
841 	io_u_clear(td, __io_u, IO_U_F_FLIGHT);
842 	if (__io_u->flags & IO_U_F_IN_CUR_DEPTH) {
843 		td->cur_depth--;
844 		assert(!(td->flags & TD_F_CHILD));
845 	}
846 
847 	io_u_rpush(&td->io_u_requeues, __io_u);
848 	td_io_u_unlock(td);
849 	td_io_u_free_notify(td);
850 	*io_u = NULL;
851 }
852 
fill_io_u(struct thread_data * td,struct io_u * io_u)853 static int fill_io_u(struct thread_data *td, struct io_u *io_u)
854 {
855 	unsigned int is_random;
856 
857 	if (td_ioengine_flagged(td, FIO_NOIO))
858 		goto out;
859 
860 	set_rw_ddir(td, io_u);
861 
862 	/*
863 	 * fsync() or fdatasync() or trim etc, we are done
864 	 */
865 	if (!ddir_rw(io_u->ddir))
866 		goto out;
867 
868 	/*
869 	 * See if it's time to switch to a new zone
870 	 */
871 	if (td->zone_bytes >= td->o.zone_size && td->o.zone_skip) {
872 		struct fio_file *f = io_u->file;
873 
874 		td->zone_bytes = 0;
875 		f->file_offset += td->o.zone_range + td->o.zone_skip;
876 
877 		/*
878 		 * Wrap from the beginning, if we exceed the file size
879 		 */
880 		if (f->file_offset >= f->real_file_size)
881 			f->file_offset = f->real_file_size - f->file_offset;
882 		f->last_pos[io_u->ddir] = f->file_offset;
883 		td->io_skip_bytes += td->o.zone_skip;
884 	}
885 
886 	/*
887 	 * No log, let the seq/rand engine retrieve the next buflen and
888 	 * position.
889 	 */
890 	if (get_next_offset(td, io_u, &is_random)) {
891 		dprint(FD_IO, "io_u %p, failed getting offset\n", io_u);
892 		return 1;
893 	}
894 
895 	io_u->buflen = get_next_buflen(td, io_u, is_random);
896 	if (!io_u->buflen) {
897 		dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u);
898 		return 1;
899 	}
900 
901 	if (io_u->offset + io_u->buflen > io_u->file->real_file_size) {
902 		dprint(FD_IO, "io_u %p, offset + buflen exceeds file size\n",
903 			io_u);
904 		dprint(FD_IO, "  offset=%llu/buflen=%lu > %llu\n",
905 			(unsigned long long) io_u->offset, io_u->buflen,
906 			(unsigned long long) io_u->file->real_file_size);
907 		return 1;
908 	}
909 
910 	/*
911 	 * mark entry before potentially trimming io_u
912 	 */
913 	if (td_random(td) && file_randommap(td, io_u->file))
914 		mark_random_map(td, io_u);
915 
916 out:
917 	dprint_io_u(io_u, "fill_io_u");
918 	td->zone_bytes += io_u->buflen;
919 	return 0;
920 }
921 
__io_u_mark_map(unsigned int * map,unsigned int nr)922 static void __io_u_mark_map(unsigned int *map, unsigned int nr)
923 {
924 	int idx = 0;
925 
926 	switch (nr) {
927 	default:
928 		idx = 6;
929 		break;
930 	case 33 ... 64:
931 		idx = 5;
932 		break;
933 	case 17 ... 32:
934 		idx = 4;
935 		break;
936 	case 9 ... 16:
937 		idx = 3;
938 		break;
939 	case 5 ... 8:
940 		idx = 2;
941 		break;
942 	case 1 ... 4:
943 		idx = 1;
944 	case 0:
945 		break;
946 	}
947 
948 	map[idx]++;
949 }
950 
io_u_mark_submit(struct thread_data * td,unsigned int nr)951 void io_u_mark_submit(struct thread_data *td, unsigned int nr)
952 {
953 	__io_u_mark_map(td->ts.io_u_submit, nr);
954 	td->ts.total_submit++;
955 }
956 
io_u_mark_complete(struct thread_data * td,unsigned int nr)957 void io_u_mark_complete(struct thread_data *td, unsigned int nr)
958 {
959 	__io_u_mark_map(td->ts.io_u_complete, nr);
960 	td->ts.total_complete++;
961 }
962 
io_u_mark_depth(struct thread_data * td,unsigned int nr)963 void io_u_mark_depth(struct thread_data *td, unsigned int nr)
964 {
965 	int idx = 0;
966 
967 	switch (td->cur_depth) {
968 	default:
969 		idx = 6;
970 		break;
971 	case 32 ... 63:
972 		idx = 5;
973 		break;
974 	case 16 ... 31:
975 		idx = 4;
976 		break;
977 	case 8 ... 15:
978 		idx = 3;
979 		break;
980 	case 4 ... 7:
981 		idx = 2;
982 		break;
983 	case 2 ... 3:
984 		idx = 1;
985 	case 1:
986 		break;
987 	}
988 
989 	td->ts.io_u_map[idx] += nr;
990 }
991 
io_u_mark_lat_usec(struct thread_data * td,unsigned long usec)992 static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec)
993 {
994 	int idx = 0;
995 
996 	assert(usec < 1000);
997 
998 	switch (usec) {
999 	case 750 ... 999:
1000 		idx = 9;
1001 		break;
1002 	case 500 ... 749:
1003 		idx = 8;
1004 		break;
1005 	case 250 ... 499:
1006 		idx = 7;
1007 		break;
1008 	case 100 ... 249:
1009 		idx = 6;
1010 		break;
1011 	case 50 ... 99:
1012 		idx = 5;
1013 		break;
1014 	case 20 ... 49:
1015 		idx = 4;
1016 		break;
1017 	case 10 ... 19:
1018 		idx = 3;
1019 		break;
1020 	case 4 ... 9:
1021 		idx = 2;
1022 		break;
1023 	case 2 ... 3:
1024 		idx = 1;
1025 	case 0 ... 1:
1026 		break;
1027 	}
1028 
1029 	assert(idx < FIO_IO_U_LAT_U_NR);
1030 	td->ts.io_u_lat_u[idx]++;
1031 }
1032 
io_u_mark_lat_msec(struct thread_data * td,unsigned long msec)1033 static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec)
1034 {
1035 	int idx = 0;
1036 
1037 	switch (msec) {
1038 	default:
1039 		idx = 11;
1040 		break;
1041 	case 1000 ... 1999:
1042 		idx = 10;
1043 		break;
1044 	case 750 ... 999:
1045 		idx = 9;
1046 		break;
1047 	case 500 ... 749:
1048 		idx = 8;
1049 		break;
1050 	case 250 ... 499:
1051 		idx = 7;
1052 		break;
1053 	case 100 ... 249:
1054 		idx = 6;
1055 		break;
1056 	case 50 ... 99:
1057 		idx = 5;
1058 		break;
1059 	case 20 ... 49:
1060 		idx = 4;
1061 		break;
1062 	case 10 ... 19:
1063 		idx = 3;
1064 		break;
1065 	case 4 ... 9:
1066 		idx = 2;
1067 		break;
1068 	case 2 ... 3:
1069 		idx = 1;
1070 	case 0 ... 1:
1071 		break;
1072 	}
1073 
1074 	assert(idx < FIO_IO_U_LAT_M_NR);
1075 	td->ts.io_u_lat_m[idx]++;
1076 }
1077 
io_u_mark_latency(struct thread_data * td,unsigned long usec)1078 static void io_u_mark_latency(struct thread_data *td, unsigned long usec)
1079 {
1080 	if (usec < 1000)
1081 		io_u_mark_lat_usec(td, usec);
1082 	else
1083 		io_u_mark_lat_msec(td, usec / 1000);
1084 }
1085 
__get_next_fileno_rand(struct thread_data * td)1086 static unsigned int __get_next_fileno_rand(struct thread_data *td)
1087 {
1088 	unsigned long fileno;
1089 
1090 	if (td->o.file_service_type == FIO_FSERVICE_RANDOM) {
1091 		uint64_t frand_max = rand_max(&td->next_file_state);
1092 		unsigned long r;
1093 
1094 		r = __rand(&td->next_file_state);
1095 		return (unsigned int) ((double) td->o.nr_files
1096 				* (r / (frand_max + 1.0)));
1097 	}
1098 
1099 	if (td->o.file_service_type == FIO_FSERVICE_ZIPF)
1100 		fileno = zipf_next(&td->next_file_zipf);
1101 	else if (td->o.file_service_type == FIO_FSERVICE_PARETO)
1102 		fileno = pareto_next(&td->next_file_zipf);
1103 	else if (td->o.file_service_type == FIO_FSERVICE_GAUSS)
1104 		fileno = gauss_next(&td->next_file_gauss);
1105 	else {
1106 		log_err("fio: bad file service type: %d\n", td->o.file_service_type);
1107 		assert(0);
1108 		return 0;
1109 	}
1110 
1111 	return fileno >> FIO_FSERVICE_SHIFT;
1112 }
1113 
1114 /*
1115  * Get next file to service by choosing one at random
1116  */
get_next_file_rand(struct thread_data * td,enum fio_file_flags goodf,enum fio_file_flags badf)1117 static struct fio_file *get_next_file_rand(struct thread_data *td,
1118 					   enum fio_file_flags goodf,
1119 					   enum fio_file_flags badf)
1120 {
1121 	struct fio_file *f;
1122 	int fno;
1123 
1124 	do {
1125 		int opened = 0;
1126 
1127 		fno = __get_next_fileno_rand(td);
1128 
1129 		f = td->files[fno];
1130 		if (fio_file_done(f))
1131 			continue;
1132 
1133 		if (!fio_file_open(f)) {
1134 			int err;
1135 
1136 			if (td->nr_open_files >= td->o.open_files)
1137 				return ERR_PTR(-EBUSY);
1138 
1139 			err = td_io_open_file(td, f);
1140 			if (err)
1141 				continue;
1142 			opened = 1;
1143 		}
1144 
1145 		if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) {
1146 			dprint(FD_FILE, "get_next_file_rand: %p\n", f);
1147 			return f;
1148 		}
1149 		if (opened)
1150 			td_io_close_file(td, f);
1151 	} while (1);
1152 }
1153 
1154 /*
1155  * Get next file to service by doing round robin between all available ones
1156  */
get_next_file_rr(struct thread_data * td,int goodf,int badf)1157 static struct fio_file *get_next_file_rr(struct thread_data *td, int goodf,
1158 					 int badf)
1159 {
1160 	unsigned int old_next_file = td->next_file;
1161 	struct fio_file *f;
1162 
1163 	do {
1164 		int opened = 0;
1165 
1166 		f = td->files[td->next_file];
1167 
1168 		td->next_file++;
1169 		if (td->next_file >= td->o.nr_files)
1170 			td->next_file = 0;
1171 
1172 		dprint(FD_FILE, "trying file %s %x\n", f->file_name, f->flags);
1173 		if (fio_file_done(f)) {
1174 			f = NULL;
1175 			continue;
1176 		}
1177 
1178 		if (!fio_file_open(f)) {
1179 			int err;
1180 
1181 			if (td->nr_open_files >= td->o.open_files)
1182 				return ERR_PTR(-EBUSY);
1183 
1184 			err = td_io_open_file(td, f);
1185 			if (err) {
1186 				dprint(FD_FILE, "error %d on open of %s\n",
1187 					err, f->file_name);
1188 				f = NULL;
1189 				continue;
1190 			}
1191 			opened = 1;
1192 		}
1193 
1194 		dprint(FD_FILE, "goodf=%x, badf=%x, ff=%x\n", goodf, badf,
1195 								f->flags);
1196 		if ((!goodf || (f->flags & goodf)) && !(f->flags & badf))
1197 			break;
1198 
1199 		if (opened)
1200 			td_io_close_file(td, f);
1201 
1202 		f = NULL;
1203 	} while (td->next_file != old_next_file);
1204 
1205 	dprint(FD_FILE, "get_next_file_rr: %p\n", f);
1206 	return f;
1207 }
1208 
__get_next_file(struct thread_data * td)1209 static struct fio_file *__get_next_file(struct thread_data *td)
1210 {
1211 	struct fio_file *f;
1212 
1213 	assert(td->o.nr_files <= td->files_index);
1214 
1215 	if (td->nr_done_files >= td->o.nr_files) {
1216 		dprint(FD_FILE, "get_next_file: nr_open=%d, nr_done=%d,"
1217 				" nr_files=%d\n", td->nr_open_files,
1218 						  td->nr_done_files,
1219 						  td->o.nr_files);
1220 		return NULL;
1221 	}
1222 
1223 	f = td->file_service_file;
1224 	if (f && fio_file_open(f) && !fio_file_closing(f)) {
1225 		if (td->o.file_service_type == FIO_FSERVICE_SEQ)
1226 			goto out;
1227 		if (td->file_service_left--)
1228 			goto out;
1229 	}
1230 
1231 	if (td->o.file_service_type == FIO_FSERVICE_RR ||
1232 	    td->o.file_service_type == FIO_FSERVICE_SEQ)
1233 		f = get_next_file_rr(td, FIO_FILE_open, FIO_FILE_closing);
1234 	else
1235 		f = get_next_file_rand(td, FIO_FILE_open, FIO_FILE_closing);
1236 
1237 	if (IS_ERR(f))
1238 		return f;
1239 
1240 	td->file_service_file = f;
1241 	td->file_service_left = td->file_service_nr - 1;
1242 out:
1243 	if (f)
1244 		dprint(FD_FILE, "get_next_file: %p [%s]\n", f, f->file_name);
1245 	else
1246 		dprint(FD_FILE, "get_next_file: NULL\n");
1247 	return f;
1248 }
1249 
get_next_file(struct thread_data * td)1250 static struct fio_file *get_next_file(struct thread_data *td)
1251 {
1252 	if (td->flags & TD_F_PROFILE_OPS) {
1253 		struct prof_io_ops *ops = &td->prof_io_ops;
1254 
1255 		if (ops->get_next_file)
1256 			return ops->get_next_file(td);
1257 	}
1258 
1259 	return __get_next_file(td);
1260 }
1261 
set_io_u_file(struct thread_data * td,struct io_u * io_u)1262 static long set_io_u_file(struct thread_data *td, struct io_u *io_u)
1263 {
1264 	struct fio_file *f;
1265 
1266 	do {
1267 		f = get_next_file(td);
1268 		if (IS_ERR_OR_NULL(f))
1269 			return PTR_ERR(f);
1270 
1271 		io_u->file = f;
1272 		get_file(f);
1273 
1274 		if (!fill_io_u(td, io_u))
1275 			break;
1276 
1277 		put_file_log(td, f);
1278 		td_io_close_file(td, f);
1279 		io_u->file = NULL;
1280 		if (td->o.file_service_type & __FIO_FSERVICE_NONUNIFORM)
1281 			fio_file_reset(td, f);
1282 		else {
1283 			fio_file_set_done(f);
1284 			td->nr_done_files++;
1285 			dprint(FD_FILE, "%s: is done (%d of %d)\n", f->file_name,
1286 					td->nr_done_files, td->o.nr_files);
1287 		}
1288 	} while (1);
1289 
1290 	return 0;
1291 }
1292 
lat_fatal(struct thread_data * td,struct io_completion_data * icd,unsigned long tusec,unsigned long max_usec)1293 static void lat_fatal(struct thread_data *td, struct io_completion_data *icd,
1294 		      unsigned long tusec, unsigned long max_usec)
1295 {
1296 	if (!td->error)
1297 		log_err("fio: latency of %lu usec exceeds specified max (%lu usec)\n", tusec, max_usec);
1298 	td_verror(td, ETIMEDOUT, "max latency exceeded");
1299 	icd->error = ETIMEDOUT;
1300 }
1301 
lat_new_cycle(struct thread_data * td)1302 static void lat_new_cycle(struct thread_data *td)
1303 {
1304 	fio_gettime(&td->latency_ts, NULL);
1305 	td->latency_ios = ddir_rw_sum(td->io_blocks);
1306 	td->latency_failed = 0;
1307 }
1308 
1309 /*
1310  * We had an IO outside the latency target. Reduce the queue depth. If we
1311  * are at QD=1, then it's time to give up.
1312  */
__lat_target_failed(struct thread_data * td)1313 static bool __lat_target_failed(struct thread_data *td)
1314 {
1315 	if (td->latency_qd == 1)
1316 		return true;
1317 
1318 	td->latency_qd_high = td->latency_qd;
1319 
1320 	if (td->latency_qd == td->latency_qd_low)
1321 		td->latency_qd_low--;
1322 
1323 	td->latency_qd = (td->latency_qd + td->latency_qd_low) / 2;
1324 
1325 	dprint(FD_RATE, "Ramped down: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
1326 
1327 	/*
1328 	 * When we ramp QD down, quiesce existing IO to prevent
1329 	 * a storm of ramp downs due to pending higher depth.
1330 	 */
1331 	io_u_quiesce(td);
1332 	lat_new_cycle(td);
1333 	return false;
1334 }
1335 
lat_target_failed(struct thread_data * td)1336 static bool lat_target_failed(struct thread_data *td)
1337 {
1338 	if (td->o.latency_percentile.u.f == 100.0)
1339 		return __lat_target_failed(td);
1340 
1341 	td->latency_failed++;
1342 	return false;
1343 }
1344 
lat_target_init(struct thread_data * td)1345 void lat_target_init(struct thread_data *td)
1346 {
1347 	td->latency_end_run = 0;
1348 
1349 	if (td->o.latency_target) {
1350 		dprint(FD_RATE, "Latency target=%llu\n", td->o.latency_target);
1351 		fio_gettime(&td->latency_ts, NULL);
1352 		td->latency_qd = 1;
1353 		td->latency_qd_high = td->o.iodepth;
1354 		td->latency_qd_low = 1;
1355 		td->latency_ios = ddir_rw_sum(td->io_blocks);
1356 	} else
1357 		td->latency_qd = td->o.iodepth;
1358 }
1359 
lat_target_reset(struct thread_data * td)1360 void lat_target_reset(struct thread_data *td)
1361 {
1362 	if (!td->latency_end_run)
1363 		lat_target_init(td);
1364 }
1365 
lat_target_success(struct thread_data * td)1366 static void lat_target_success(struct thread_data *td)
1367 {
1368 	const unsigned int qd = td->latency_qd;
1369 	struct thread_options *o = &td->o;
1370 
1371 	td->latency_qd_low = td->latency_qd;
1372 
1373 	/*
1374 	 * If we haven't failed yet, we double up to a failing value instead
1375 	 * of bisecting from highest possible queue depth. If we have set
1376 	 * a limit other than td->o.iodepth, bisect between that.
1377 	 */
1378 	if (td->latency_qd_high != o->iodepth)
1379 		td->latency_qd = (td->latency_qd + td->latency_qd_high) / 2;
1380 	else
1381 		td->latency_qd *= 2;
1382 
1383 	if (td->latency_qd > o->iodepth)
1384 		td->latency_qd = o->iodepth;
1385 
1386 	dprint(FD_RATE, "Ramped up: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
1387 
1388 	/*
1389 	 * Same as last one, we are done. Let it run a latency cycle, so
1390 	 * we get only the results from the targeted depth.
1391 	 */
1392 	if (td->latency_qd == qd) {
1393 		if (td->latency_end_run) {
1394 			dprint(FD_RATE, "We are done\n");
1395 			td->done = 1;
1396 		} else {
1397 			dprint(FD_RATE, "Quiesce and final run\n");
1398 			io_u_quiesce(td);
1399 			td->latency_end_run = 1;
1400 			reset_all_stats(td);
1401 			reset_io_stats(td);
1402 		}
1403 	}
1404 
1405 	lat_new_cycle(td);
1406 }
1407 
1408 /*
1409  * Check if we can bump the queue depth
1410  */
lat_target_check(struct thread_data * td)1411 void lat_target_check(struct thread_data *td)
1412 {
1413 	uint64_t usec_window;
1414 	uint64_t ios;
1415 	double success_ios;
1416 
1417 	usec_window = utime_since_now(&td->latency_ts);
1418 	if (usec_window < td->o.latency_window)
1419 		return;
1420 
1421 	ios = ddir_rw_sum(td->io_blocks) - td->latency_ios;
1422 	success_ios = (double) (ios - td->latency_failed) / (double) ios;
1423 	success_ios *= 100.0;
1424 
1425 	dprint(FD_RATE, "Success rate: %.2f%% (target %.2f%%)\n", success_ios, td->o.latency_percentile.u.f);
1426 
1427 	if (success_ios >= td->o.latency_percentile.u.f)
1428 		lat_target_success(td);
1429 	else
1430 		__lat_target_failed(td);
1431 }
1432 
1433 /*
1434  * If latency target is enabled, we might be ramping up or down and not
1435  * using the full queue depth available.
1436  */
queue_full(const struct thread_data * td)1437 bool queue_full(const struct thread_data *td)
1438 {
1439 	const int qempty = io_u_qempty(&td->io_u_freelist);
1440 
1441 	if (qempty)
1442 		return true;
1443 	if (!td->o.latency_target)
1444 		return false;
1445 
1446 	return td->cur_depth >= td->latency_qd;
1447 }
1448 
__get_io_u(struct thread_data * td)1449 struct io_u *__get_io_u(struct thread_data *td)
1450 {
1451 	struct io_u *io_u = NULL;
1452 
1453 	if (td->stop_io)
1454 		return NULL;
1455 
1456 	td_io_u_lock(td);
1457 
1458 again:
1459 	if (!io_u_rempty(&td->io_u_requeues))
1460 		io_u = io_u_rpop(&td->io_u_requeues);
1461 	else if (!queue_full(td)) {
1462 		io_u = io_u_qpop(&td->io_u_freelist);
1463 
1464 		io_u->file = NULL;
1465 		io_u->buflen = 0;
1466 		io_u->resid = 0;
1467 		io_u->end_io = NULL;
1468 	}
1469 
1470 	if (io_u) {
1471 		assert(io_u->flags & IO_U_F_FREE);
1472 		io_u_clear(td, io_u, IO_U_F_FREE | IO_U_F_NO_FILE_PUT |
1473 				 IO_U_F_TRIMMED | IO_U_F_BARRIER |
1474 				 IO_U_F_VER_LIST);
1475 
1476 		io_u->error = 0;
1477 		io_u->acct_ddir = -1;
1478 		td->cur_depth++;
1479 		assert(!(td->flags & TD_F_CHILD));
1480 		io_u_set(td, io_u, IO_U_F_IN_CUR_DEPTH);
1481 		io_u->ipo = NULL;
1482 	} else if (td_async_processing(td)) {
1483 		/*
1484 		 * We ran out, wait for async verify threads to finish and
1485 		 * return one
1486 		 */
1487 		assert(!(td->flags & TD_F_CHILD));
1488 		assert(!pthread_cond_wait(&td->free_cond, &td->io_u_lock));
1489 		goto again;
1490 	}
1491 
1492 	td_io_u_unlock(td);
1493 	return io_u;
1494 }
1495 
check_get_trim(struct thread_data * td,struct io_u * io_u)1496 static bool check_get_trim(struct thread_data *td, struct io_u *io_u)
1497 {
1498 	if (!(td->flags & TD_F_TRIM_BACKLOG))
1499 		return false;
1500 
1501 	if (td->trim_entries) {
1502 		int get_trim = 0;
1503 
1504 		if (td->trim_batch) {
1505 			td->trim_batch--;
1506 			get_trim = 1;
1507 		} else if (!(td->io_hist_len % td->o.trim_backlog) &&
1508 			 td->last_ddir != DDIR_READ) {
1509 			td->trim_batch = td->o.trim_batch;
1510 			if (!td->trim_batch)
1511 				td->trim_batch = td->o.trim_backlog;
1512 			get_trim = 1;
1513 		}
1514 
1515 		if (get_trim && get_next_trim(td, io_u))
1516 			return true;
1517 	}
1518 
1519 	return false;
1520 }
1521 
check_get_verify(struct thread_data * td,struct io_u * io_u)1522 static bool check_get_verify(struct thread_data *td, struct io_u *io_u)
1523 {
1524 	if (!(td->flags & TD_F_VER_BACKLOG))
1525 		return false;
1526 
1527 	if (td->io_hist_len) {
1528 		int get_verify = 0;
1529 
1530 		if (td->verify_batch)
1531 			get_verify = 1;
1532 		else if (!(td->io_hist_len % td->o.verify_backlog) &&
1533 			 td->last_ddir != DDIR_READ) {
1534 			td->verify_batch = td->o.verify_batch;
1535 			if (!td->verify_batch)
1536 				td->verify_batch = td->o.verify_backlog;
1537 			get_verify = 1;
1538 		}
1539 
1540 		if (get_verify && !get_next_verify(td, io_u)) {
1541 			td->verify_batch--;
1542 			return true;
1543 		}
1544 	}
1545 
1546 	return false;
1547 }
1548 
1549 /*
1550  * Fill offset and start time into the buffer content, to prevent too
1551  * easy compressible data for simple de-dupe attempts. Do this for every
1552  * 512b block in the range, since that should be the smallest block size
1553  * we can expect from a device.
1554  */
small_content_scramble(struct io_u * io_u)1555 static void small_content_scramble(struct io_u *io_u)
1556 {
1557 	unsigned int i, nr_blocks = io_u->buflen / 512;
1558 	uint64_t boffset;
1559 	unsigned int offset;
1560 	void *p, *end;
1561 
1562 	if (!nr_blocks)
1563 		return;
1564 
1565 	p = io_u->xfer_buf;
1566 	boffset = io_u->offset;
1567 	io_u->buf_filled_len = 0;
1568 
1569 	for (i = 0; i < nr_blocks; i++) {
1570 		/*
1571 		 * Fill the byte offset into a "random" start offset of
1572 		 * the buffer, given by the product of the usec time
1573 		 * and the actual offset.
1574 		 */
1575 		offset = (io_u->start_time.tv_usec ^ boffset) & 511;
1576 		offset &= ~(sizeof(uint64_t) - 1);
1577 		if (offset >= 512 - sizeof(uint64_t))
1578 			offset -= sizeof(uint64_t);
1579 		memcpy(p + offset, &boffset, sizeof(boffset));
1580 
1581 		end = p + 512 - sizeof(io_u->start_time);
1582 		memcpy(end, &io_u->start_time, sizeof(io_u->start_time));
1583 		p += 512;
1584 		boffset += 512;
1585 	}
1586 }
1587 
1588 /*
1589  * Return an io_u to be processed. Gets a buflen and offset, sets direction,
1590  * etc. The returned io_u is fully ready to be prepped and submitted.
1591  */
get_io_u(struct thread_data * td)1592 struct io_u *get_io_u(struct thread_data *td)
1593 {
1594 	struct fio_file *f;
1595 	struct io_u *io_u;
1596 	int do_scramble = 0;
1597 	long ret = 0;
1598 
1599 	io_u = __get_io_u(td);
1600 	if (!io_u) {
1601 		dprint(FD_IO, "__get_io_u failed\n");
1602 		return NULL;
1603 	}
1604 
1605 	if (check_get_verify(td, io_u))
1606 		goto out;
1607 	if (check_get_trim(td, io_u))
1608 		goto out;
1609 
1610 	/*
1611 	 * from a requeue, io_u already setup
1612 	 */
1613 	if (io_u->file)
1614 		goto out;
1615 
1616 	/*
1617 	 * If using an iolog, grab next piece if any available.
1618 	 */
1619 	if (td->flags & TD_F_READ_IOLOG) {
1620 		if (read_iolog_get(td, io_u))
1621 			goto err_put;
1622 	} else if (set_io_u_file(td, io_u)) {
1623 		ret = -EBUSY;
1624 		dprint(FD_IO, "io_u %p, setting file failed\n", io_u);
1625 		goto err_put;
1626 	}
1627 
1628 	f = io_u->file;
1629 	if (!f) {
1630 		dprint(FD_IO, "io_u %p, setting file failed\n", io_u);
1631 		goto err_put;
1632 	}
1633 
1634 	assert(fio_file_open(f));
1635 
1636 	if (ddir_rw(io_u->ddir)) {
1637 		if (!io_u->buflen && !td_ioengine_flagged(td, FIO_NOIO)) {
1638 			dprint(FD_IO, "get_io_u: zero buflen on %p\n", io_u);
1639 			goto err_put;
1640 		}
1641 
1642 		f->last_start[io_u->ddir] = io_u->offset;
1643 		f->last_pos[io_u->ddir] = io_u->offset + io_u->buflen;
1644 
1645 		if (io_u->ddir == DDIR_WRITE) {
1646 			if (td->flags & TD_F_REFILL_BUFFERS) {
1647 				io_u_fill_buffer(td, io_u,
1648 					td->o.min_bs[DDIR_WRITE],
1649 					io_u->buflen);
1650 			} else if ((td->flags & TD_F_SCRAMBLE_BUFFERS) &&
1651 				   !(td->flags & TD_F_COMPRESS))
1652 				do_scramble = 1;
1653 			if (td->flags & TD_F_VER_NONE) {
1654 				populate_verify_io_u(td, io_u);
1655 				do_scramble = 0;
1656 			}
1657 		} else if (io_u->ddir == DDIR_READ) {
1658 			/*
1659 			 * Reset the buf_filled parameters so next time if the
1660 			 * buffer is used for writes it is refilled.
1661 			 */
1662 			io_u->buf_filled_len = 0;
1663 		}
1664 	}
1665 
1666 	/*
1667 	 * Set io data pointers.
1668 	 */
1669 	io_u->xfer_buf = io_u->buf;
1670 	io_u->xfer_buflen = io_u->buflen;
1671 
1672 out:
1673 	assert(io_u->file);
1674 	if (!td_io_prep(td, io_u)) {
1675 		if (!td->o.disable_lat)
1676 			fio_gettime(&io_u->start_time, NULL);
1677 
1678 		if (do_scramble)
1679 			small_content_scramble(io_u);
1680 
1681 		return io_u;
1682 	}
1683 err_put:
1684 	dprint(FD_IO, "get_io_u failed\n");
1685 	put_io_u(td, io_u);
1686 	return ERR_PTR(ret);
1687 }
1688 
__io_u_log_error(struct thread_data * td,struct io_u * io_u)1689 static void __io_u_log_error(struct thread_data *td, struct io_u *io_u)
1690 {
1691 	enum error_type_bit eb = td_error_type(io_u->ddir, io_u->error);
1692 
1693 	if (td_non_fatal_error(td, eb, io_u->error) && !td->o.error_dump)
1694 		return;
1695 
1696 	log_err("fio: io_u error%s%s: %s: %s offset=%llu, buflen=%lu\n",
1697 		io_u->file ? " on file " : "",
1698 		io_u->file ? io_u->file->file_name : "",
1699 		strerror(io_u->error),
1700 		io_ddir_name(io_u->ddir),
1701 		io_u->offset, io_u->xfer_buflen);
1702 
1703 	if (td->io_ops->errdetails) {
1704 		char *err = td->io_ops->errdetails(io_u);
1705 
1706 		log_err("fio: %s\n", err);
1707 		free(err);
1708 	}
1709 
1710 	if (!td->error)
1711 		td_verror(td, io_u->error, "io_u error");
1712 }
1713 
io_u_log_error(struct thread_data * td,struct io_u * io_u)1714 void io_u_log_error(struct thread_data *td, struct io_u *io_u)
1715 {
1716 	__io_u_log_error(td, io_u);
1717 	if (td->parent)
1718 		__io_u_log_error(td->parent, io_u);
1719 }
1720 
gtod_reduce(struct thread_data * td)1721 static inline bool gtod_reduce(struct thread_data *td)
1722 {
1723 	return (td->o.disable_clat && td->o.disable_slat && td->o.disable_bw)
1724 			|| td->o.gtod_reduce;
1725 }
1726 
account_io_completion(struct thread_data * td,struct io_u * io_u,struct io_completion_data * icd,const enum fio_ddir idx,unsigned int bytes)1727 static void account_io_completion(struct thread_data *td, struct io_u *io_u,
1728 				  struct io_completion_data *icd,
1729 				  const enum fio_ddir idx, unsigned int bytes)
1730 {
1731 	const int no_reduce = !gtod_reduce(td);
1732 	unsigned long lusec = 0;
1733 
1734 	if (td->parent)
1735 		td = td->parent;
1736 
1737 	if (!td->o.stats)
1738 		return;
1739 
1740 	if (no_reduce)
1741 		lusec = utime_since(&io_u->issue_time, &icd->time);
1742 
1743 	if (!td->o.disable_lat) {
1744 		unsigned long tusec;
1745 
1746 		tusec = utime_since(&io_u->start_time, &icd->time);
1747 		add_lat_sample(td, idx, tusec, bytes, io_u->offset);
1748 
1749 		if (td->flags & TD_F_PROFILE_OPS) {
1750 			struct prof_io_ops *ops = &td->prof_io_ops;
1751 
1752 			if (ops->io_u_lat)
1753 				icd->error = ops->io_u_lat(td, tusec);
1754 		}
1755 
1756 		if (td->o.max_latency && tusec > td->o.max_latency)
1757 			lat_fatal(td, icd, tusec, td->o.max_latency);
1758 		if (td->o.latency_target && tusec > td->o.latency_target) {
1759 			if (lat_target_failed(td))
1760 				lat_fatal(td, icd, tusec, td->o.latency_target);
1761 		}
1762 	}
1763 
1764 	if (ddir_rw(idx)) {
1765 		if (!td->o.disable_clat) {
1766 			add_clat_sample(td, idx, lusec, bytes, io_u->offset);
1767 			io_u_mark_latency(td, lusec);
1768 		}
1769 
1770 		if (!td->o.disable_bw && per_unit_log(td->bw_log))
1771 			add_bw_sample(td, io_u, bytes, lusec);
1772 
1773 		if (no_reduce && per_unit_log(td->iops_log))
1774 			add_iops_sample(td, io_u, bytes);
1775 	}
1776 
1777 	if (td->ts.nr_block_infos && io_u->ddir == DDIR_TRIM) {
1778 		uint32_t *info = io_u_block_info(td, io_u);
1779 		if (BLOCK_INFO_STATE(*info) < BLOCK_STATE_TRIM_FAILURE) {
1780 			if (io_u->ddir == DDIR_TRIM) {
1781 				*info = BLOCK_INFO(BLOCK_STATE_TRIMMED,
1782 						BLOCK_INFO_TRIMS(*info) + 1);
1783 			} else if (io_u->ddir == DDIR_WRITE) {
1784 				*info = BLOCK_INFO_SET_STATE(BLOCK_STATE_WRITTEN,
1785 								*info);
1786 			}
1787 		}
1788 	}
1789 }
1790 
file_log_write_comp(const struct thread_data * td,struct fio_file * f,uint64_t offset,unsigned int bytes)1791 static void file_log_write_comp(const struct thread_data *td, struct fio_file *f,
1792 				uint64_t offset, unsigned int bytes)
1793 {
1794 	int idx;
1795 
1796 	if (!f)
1797 		return;
1798 
1799 	if (f->first_write == -1ULL || offset < f->first_write)
1800 		f->first_write = offset;
1801 	if (f->last_write == -1ULL || ((offset + bytes) > f->last_write))
1802 		f->last_write = offset + bytes;
1803 
1804 	if (!f->last_write_comp)
1805 		return;
1806 
1807 	idx = f->last_write_idx++;
1808 	f->last_write_comp[idx] = offset;
1809 	if (f->last_write_idx == td->o.iodepth)
1810 		f->last_write_idx = 0;
1811 }
1812 
io_completed(struct thread_data * td,struct io_u ** io_u_ptr,struct io_completion_data * icd)1813 static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,
1814 			 struct io_completion_data *icd)
1815 {
1816 	struct io_u *io_u = *io_u_ptr;
1817 	enum fio_ddir ddir = io_u->ddir;
1818 	struct fio_file *f = io_u->file;
1819 
1820 	dprint_io_u(io_u, "io complete");
1821 
1822 	assert(io_u->flags & IO_U_F_FLIGHT);
1823 	io_u_clear(td, io_u, IO_U_F_FLIGHT | IO_U_F_BUSY_OK);
1824 
1825 	/*
1826 	 * Mark IO ok to verify
1827 	 */
1828 	if (io_u->ipo) {
1829 		/*
1830 		 * Remove errored entry from the verification list
1831 		 */
1832 		if (io_u->error)
1833 			unlog_io_piece(td, io_u);
1834 		else {
1835 			io_u->ipo->flags &= ~IP_F_IN_FLIGHT;
1836 			write_barrier();
1837 		}
1838 	}
1839 
1840 	if (ddir_sync(ddir)) {
1841 		td->last_was_sync = 1;
1842 		if (f) {
1843 			f->first_write = -1ULL;
1844 			f->last_write = -1ULL;
1845 		}
1846 		return;
1847 	}
1848 
1849 	td->last_was_sync = 0;
1850 	td->last_ddir = ddir;
1851 
1852 	if (!io_u->error && ddir_rw(ddir)) {
1853 		unsigned int bytes = io_u->buflen - io_u->resid;
1854 		int ret;
1855 
1856 		td->io_blocks[ddir]++;
1857 		td->this_io_blocks[ddir]++;
1858 		td->io_bytes[ddir] += bytes;
1859 
1860 		if (!(io_u->flags & IO_U_F_VER_LIST))
1861 			td->this_io_bytes[ddir] += bytes;
1862 
1863 		if (ddir == DDIR_WRITE)
1864 			file_log_write_comp(td, f, io_u->offset, bytes);
1865 
1866 		if (ramp_time_over(td) && (td->runstate == TD_RUNNING ||
1867 					   td->runstate == TD_VERIFYING))
1868 			account_io_completion(td, io_u, icd, ddir, bytes);
1869 
1870 		icd->bytes_done[ddir] += bytes;
1871 
1872 		if (io_u->end_io) {
1873 			ret = io_u->end_io(td, io_u_ptr);
1874 			io_u = *io_u_ptr;
1875 			if (ret && !icd->error)
1876 				icd->error = ret;
1877 		}
1878 	} else if (io_u->error) {
1879 		icd->error = io_u->error;
1880 		io_u_log_error(td, io_u);
1881 	}
1882 	if (icd->error) {
1883 		enum error_type_bit eb = td_error_type(ddir, icd->error);
1884 
1885 		if (!td_non_fatal_error(td, eb, icd->error))
1886 			return;
1887 
1888 		/*
1889 		 * If there is a non_fatal error, then add to the error count
1890 		 * and clear all the errors.
1891 		 */
1892 		update_error_count(td, icd->error);
1893 		td_clear_error(td);
1894 		icd->error = 0;
1895 		if (io_u)
1896 			io_u->error = 0;
1897 	}
1898 }
1899 
init_icd(struct thread_data * td,struct io_completion_data * icd,int nr)1900 static void init_icd(struct thread_data *td, struct io_completion_data *icd,
1901 		     int nr)
1902 {
1903 	int ddir;
1904 
1905 	if (!gtod_reduce(td))
1906 		fio_gettime(&icd->time, NULL);
1907 
1908 	icd->nr = nr;
1909 
1910 	icd->error = 0;
1911 	for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++)
1912 		icd->bytes_done[ddir] = 0;
1913 }
1914 
ios_completed(struct thread_data * td,struct io_completion_data * icd)1915 static void ios_completed(struct thread_data *td,
1916 			  struct io_completion_data *icd)
1917 {
1918 	struct io_u *io_u;
1919 	int i;
1920 
1921 	for (i = 0; i < icd->nr; i++) {
1922 		io_u = td->io_ops->event(td, i);
1923 
1924 		io_completed(td, &io_u, icd);
1925 
1926 		if (io_u)
1927 			put_io_u(td, io_u);
1928 	}
1929 }
1930 
1931 /*
1932  * Complete a single io_u for the sync engines.
1933  */
io_u_sync_complete(struct thread_data * td,struct io_u * io_u)1934 int io_u_sync_complete(struct thread_data *td, struct io_u *io_u)
1935 {
1936 	struct io_completion_data icd;
1937 	int ddir;
1938 
1939 	init_icd(td, &icd, 1);
1940 	io_completed(td, &io_u, &icd);
1941 
1942 	if (io_u)
1943 		put_io_u(td, io_u);
1944 
1945 	if (icd.error) {
1946 		td_verror(td, icd.error, "io_u_sync_complete");
1947 		return -1;
1948 	}
1949 
1950 	for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++)
1951 		td->bytes_done[ddir] += icd.bytes_done[ddir];
1952 
1953 	return 0;
1954 }
1955 
1956 /*
1957  * Called to complete min_events number of io for the async engines.
1958  */
io_u_queued_complete(struct thread_data * td,int min_evts)1959 int io_u_queued_complete(struct thread_data *td, int min_evts)
1960 {
1961 	struct io_completion_data icd;
1962 	struct timespec *tvp = NULL;
1963 	int ret, ddir;
1964 	struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, };
1965 
1966 	dprint(FD_IO, "io_u_queued_complete: min=%d\n", min_evts);
1967 
1968 	if (!min_evts)
1969 		tvp = &ts;
1970 	else if (min_evts > td->cur_depth)
1971 		min_evts = td->cur_depth;
1972 
1973 	/* No worries, td_io_getevents fixes min and max if they are
1974 	 * set incorrectly */
1975 	ret = td_io_getevents(td, min_evts, td->o.iodepth_batch_complete_max, tvp);
1976 	if (ret < 0) {
1977 		td_verror(td, -ret, "td_io_getevents");
1978 		return ret;
1979 	} else if (!ret)
1980 		return ret;
1981 
1982 	init_icd(td, &icd, ret);
1983 	ios_completed(td, &icd);
1984 	if (icd.error) {
1985 		td_verror(td, icd.error, "io_u_queued_complete");
1986 		return -1;
1987 	}
1988 
1989 	for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++)
1990 		td->bytes_done[ddir] += icd.bytes_done[ddir];
1991 
1992 	return ret;
1993 }
1994 
1995 /*
1996  * Call when io_u is really queued, to update the submission latency.
1997  */
io_u_queued(struct thread_data * td,struct io_u * io_u)1998 void io_u_queued(struct thread_data *td, struct io_u *io_u)
1999 {
2000 	if (!td->o.disable_slat && ramp_time_over(td) && td->o.stats) {
2001 		unsigned long slat_time;
2002 
2003 		slat_time = utime_since(&io_u->start_time, &io_u->issue_time);
2004 
2005 		if (td->parent)
2006 			td = td->parent;
2007 
2008 		add_slat_sample(td, io_u->ddir, slat_time, io_u->xfer_buflen,
2009 				io_u->offset);
2010 	}
2011 }
2012 
2013 /*
2014  * See if we should reuse the last seed, if dedupe is enabled
2015  */
get_buf_state(struct thread_data * td)2016 static struct frand_state *get_buf_state(struct thread_data *td)
2017 {
2018 	unsigned int v;
2019 
2020 	if (!td->o.dedupe_percentage)
2021 		return &td->buf_state;
2022 	else if (td->o.dedupe_percentage == 100) {
2023 		frand_copy(&td->buf_state_prev, &td->buf_state);
2024 		return &td->buf_state;
2025 	}
2026 
2027 	v = rand32_between(&td->dedupe_state, 1, 100);
2028 
2029 	if (v <= td->o.dedupe_percentage)
2030 		return &td->buf_state_prev;
2031 
2032 	return &td->buf_state;
2033 }
2034 
save_buf_state(struct thread_data * td,struct frand_state * rs)2035 static void save_buf_state(struct thread_data *td, struct frand_state *rs)
2036 {
2037 	if (td->o.dedupe_percentage == 100)
2038 		frand_copy(rs, &td->buf_state_prev);
2039 	else if (rs == &td->buf_state)
2040 		frand_copy(&td->buf_state_prev, rs);
2041 }
2042 
fill_io_buffer(struct thread_data * td,void * buf,unsigned int min_write,unsigned int max_bs)2043 void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write,
2044 		    unsigned int max_bs)
2045 {
2046 	struct thread_options *o = &td->o;
2047 
2048 	if (o->mem_type == MEM_CUDA_MALLOC)
2049 		return;
2050 
2051 	if (o->compress_percentage || o->dedupe_percentage) {
2052 		unsigned int perc = td->o.compress_percentage;
2053 		struct frand_state *rs;
2054 		unsigned int left = max_bs;
2055 		unsigned int this_write;
2056 
2057 		do {
2058 			rs = get_buf_state(td);
2059 
2060 			min_write = min(min_write, left);
2061 
2062 			if (perc) {
2063 				this_write = min_not_zero(min_write,
2064 							td->o.compress_chunk);
2065 
2066 				fill_random_buf_percentage(rs, buf, perc,
2067 					this_write, this_write,
2068 					o->buffer_pattern,
2069 					o->buffer_pattern_bytes);
2070 			} else {
2071 				fill_random_buf(rs, buf, min_write);
2072 				this_write = min_write;
2073 			}
2074 
2075 			buf += this_write;
2076 			left -= this_write;
2077 			save_buf_state(td, rs);
2078 		} while (left);
2079 	} else if (o->buffer_pattern_bytes)
2080 		fill_buffer_pattern(td, buf, max_bs);
2081 	else if (o->zero_buffers)
2082 		memset(buf, 0, max_bs);
2083 	else
2084 		fill_random_buf(get_buf_state(td), buf, max_bs);
2085 }
2086 
2087 /*
2088  * "randomly" fill the buffer contents
2089  */
io_u_fill_buffer(struct thread_data * td,struct io_u * io_u,unsigned int min_write,unsigned int max_bs)2090 void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u,
2091 		      unsigned int min_write, unsigned int max_bs)
2092 {
2093 	io_u->buf_filled_len = 0;
2094 	fill_io_buffer(td, io_u->buf, min_write, max_bs);
2095 }
2096 
do_sync_file_range(const struct thread_data * td,struct fio_file * f)2097 static int do_sync_file_range(const struct thread_data *td,
2098 			      struct fio_file *f)
2099 {
2100 	off64_t offset, nbytes;
2101 
2102 	offset = f->first_write;
2103 	nbytes = f->last_write - f->first_write;
2104 
2105 	if (!nbytes)
2106 		return 0;
2107 
2108 	return sync_file_range(f->fd, offset, nbytes, td->o.sync_file_range);
2109 }
2110 
do_io_u_sync(const struct thread_data * td,struct io_u * io_u)2111 int do_io_u_sync(const struct thread_data *td, struct io_u *io_u)
2112 {
2113 	int ret;
2114 
2115 	if (io_u->ddir == DDIR_SYNC) {
2116 		ret = fsync(io_u->file->fd);
2117 	} else if (io_u->ddir == DDIR_DATASYNC) {
2118 #ifdef CONFIG_FDATASYNC
2119 		ret = fdatasync(io_u->file->fd);
2120 #else
2121 		ret = io_u->xfer_buflen;
2122 		io_u->error = EINVAL;
2123 #endif
2124 	} else if (io_u->ddir == DDIR_SYNC_FILE_RANGE)
2125 		ret = do_sync_file_range(td, io_u->file);
2126 	else {
2127 		ret = io_u->xfer_buflen;
2128 		io_u->error = EINVAL;
2129 	}
2130 
2131 	if (ret < 0)
2132 		io_u->error = errno;
2133 
2134 	return ret;
2135 }
2136 
do_io_u_trim(const struct thread_data * td,struct io_u * io_u)2137 int do_io_u_trim(const struct thread_data *td, struct io_u *io_u)
2138 {
2139 #ifndef FIO_HAVE_TRIM
2140 	io_u->error = EINVAL;
2141 	return 0;
2142 #else
2143 	struct fio_file *f = io_u->file;
2144 	int ret;
2145 
2146 	ret = os_trim(f->fd, io_u->offset, io_u->xfer_buflen);
2147 	if (!ret)
2148 		return io_u->xfer_buflen;
2149 
2150 	io_u->error = ret;
2151 	return 0;
2152 #endif
2153 }
2154