• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2016 CNEX Labs
4  * Initial release: Javier Gonzalez <javier@cnexlabs.com>
5  *
6  * Based upon the circular ringbuffer.
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License version
10  * 2 as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License for more details.
16  *
17  * pblk-rb.c - pblk's write buffer
18  */
19 
20 #include <linux/circ_buf.h>
21 
22 #include "pblk.h"
23 
24 static DECLARE_RWSEM(pblk_rb_lock);
25 
pblk_rb_data_free(struct pblk_rb * rb)26 static void pblk_rb_data_free(struct pblk_rb *rb)
27 {
28 	struct pblk_rb_pages *p, *t;
29 
30 	down_write(&pblk_rb_lock);
31 	list_for_each_entry_safe(p, t, &rb->pages, list) {
32 		free_pages((unsigned long)page_address(p->pages), p->order);
33 		list_del(&p->list);
34 		kfree(p);
35 	}
36 	up_write(&pblk_rb_lock);
37 }
38 
pblk_rb_free(struct pblk_rb * rb)39 void pblk_rb_free(struct pblk_rb *rb)
40 {
41 	pblk_rb_data_free(rb);
42 	vfree(rb->entries);
43 }
44 
45 /*
46  * pblk_rb_calculate_size -- calculate the size of the write buffer
47  */
pblk_rb_calculate_size(unsigned int nr_entries,unsigned int threshold)48 static unsigned int pblk_rb_calculate_size(unsigned int nr_entries,
49 					   unsigned int threshold)
50 {
51 	unsigned int thr_sz = 1 << (get_count_order(threshold + NVM_MAX_VLBA));
52 	unsigned int max_sz = max(thr_sz, nr_entries);
53 	unsigned int max_io;
54 
55 	/* Alloc a write buffer that can (i) fit at least two split bios
56 	 * (considering max I/O size NVM_MAX_VLBA, and (ii) guarantee that the
57 	 * threshold will be respected
58 	 */
59 	max_io = (1 << max((int)(get_count_order(max_sz)),
60 				(int)(get_count_order(NVM_MAX_VLBA << 1))));
61 	if ((threshold + NVM_MAX_VLBA) >= max_io)
62 		max_io <<= 1;
63 
64 	return max_io;
65 }
66 
67 /*
68  * Initialize ring buffer. The data and metadata buffers must be previously
69  * allocated and their size must be a power of two
70  * (Documentation/core-api/circular-buffers.rst)
71  */
pblk_rb_init(struct pblk_rb * rb,unsigned int size,unsigned int threshold,unsigned int seg_size)72 int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold,
73 		 unsigned int seg_size)
74 {
75 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
76 	struct pblk_rb_entry *entries;
77 	unsigned int init_entry = 0;
78 	unsigned int max_order = MAX_ORDER - 1;
79 	unsigned int power_size, power_seg_sz;
80 	unsigned int alloc_order, order, iter;
81 	unsigned int nr_entries;
82 
83 	nr_entries = pblk_rb_calculate_size(size, threshold);
84 	entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry)));
85 	if (!entries)
86 		return -ENOMEM;
87 
88 	power_size = get_count_order(nr_entries);
89 	power_seg_sz = get_count_order(seg_size);
90 
91 	down_write(&pblk_rb_lock);
92 	rb->entries = entries;
93 	rb->seg_size = (1 << power_seg_sz);
94 	rb->nr_entries = (1 << power_size);
95 	rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
96 	rb->back_thres = threshold;
97 	rb->flush_point = EMPTY_ENTRY;
98 
99 	spin_lock_init(&rb->w_lock);
100 	spin_lock_init(&rb->s_lock);
101 
102 	INIT_LIST_HEAD(&rb->pages);
103 
104 	alloc_order = power_size;
105 	if (alloc_order >= max_order) {
106 		order = max_order;
107 		iter = (1 << (alloc_order - max_order));
108 	} else {
109 		order = alloc_order;
110 		iter = 1;
111 	}
112 
113 	do {
114 		struct pblk_rb_entry *entry;
115 		struct pblk_rb_pages *page_set;
116 		void *kaddr;
117 		unsigned long set_size;
118 		int i;
119 
120 		page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
121 		if (!page_set) {
122 			up_write(&pblk_rb_lock);
123 			vfree(entries);
124 			return -ENOMEM;
125 		}
126 
127 		page_set->order = order;
128 		page_set->pages = alloc_pages(GFP_KERNEL, order);
129 		if (!page_set->pages) {
130 			kfree(page_set);
131 			pblk_rb_data_free(rb);
132 			up_write(&pblk_rb_lock);
133 			vfree(entries);
134 			return -ENOMEM;
135 		}
136 		kaddr = page_address(page_set->pages);
137 
138 		entry = &rb->entries[init_entry];
139 		entry->data = kaddr;
140 		entry->cacheline = pblk_cacheline_to_addr(init_entry++);
141 		entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
142 
143 		set_size = (1 << order);
144 		for (i = 1; i < set_size; i++) {
145 			entry = &rb->entries[init_entry];
146 			entry->cacheline = pblk_cacheline_to_addr(init_entry++);
147 			entry->data = kaddr + (i * rb->seg_size);
148 			entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
149 			bio_list_init(&entry->w_ctx.bios);
150 		}
151 
152 		list_add_tail(&page_set->list, &rb->pages);
153 		iter--;
154 	} while (iter > 0);
155 	up_write(&pblk_rb_lock);
156 
157 #ifdef CONFIG_NVM_PBLK_DEBUG
158 	atomic_set(&rb->inflight_flush_point, 0);
159 #endif
160 
161 	/*
162 	 * Initialize rate-limiter, which controls access to the write buffer
163 	 * by user and GC I/O
164 	 */
165 	pblk_rl_init(&pblk->rl, rb->nr_entries, threshold);
166 
167 	return 0;
168 }
169 
clean_wctx(struct pblk_w_ctx * w_ctx)170 static void clean_wctx(struct pblk_w_ctx *w_ctx)
171 {
172 	int flags;
173 
174 	flags = READ_ONCE(w_ctx->flags);
175 	WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY),
176 			"pblk: overwriting unsubmitted data\n");
177 
178 	/* Release flags on context. Protect from writes and reads */
179 	smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
180 	pblk_ppa_set_empty(&w_ctx->ppa);
181 	w_ctx->lba = ADDR_EMPTY;
182 }
183 
184 #define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
185 #define pblk_rb_ring_space(rb, head, tail, size) \
186 					(CIRC_SPACE(head, tail, size))
187 
188 /*
189  * Buffer space is calculated with respect to the back pointer signaling
190  * synchronized entries to the media.
191  */
pblk_rb_space(struct pblk_rb * rb)192 static unsigned int pblk_rb_space(struct pblk_rb *rb)
193 {
194 	unsigned int mem = READ_ONCE(rb->mem);
195 	unsigned int sync = READ_ONCE(rb->sync);
196 
197 	return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
198 }
199 
pblk_rb_ptr_wrap(struct pblk_rb * rb,unsigned int p,unsigned int nr_entries)200 unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p,
201 			      unsigned int nr_entries)
202 {
203 	return (p + nr_entries) & (rb->nr_entries - 1);
204 }
205 
206 /*
207  * Buffer count is calculated with respect to the submission entry signaling the
208  * entries that are available to send to the media
209  */
pblk_rb_read_count(struct pblk_rb * rb)210 unsigned int pblk_rb_read_count(struct pblk_rb *rb)
211 {
212 	unsigned int mem = READ_ONCE(rb->mem);
213 	unsigned int subm = READ_ONCE(rb->subm);
214 
215 	return pblk_rb_ring_count(mem, subm, rb->nr_entries);
216 }
217 
pblk_rb_sync_count(struct pblk_rb * rb)218 unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
219 {
220 	unsigned int mem = READ_ONCE(rb->mem);
221 	unsigned int sync = READ_ONCE(rb->sync);
222 
223 	return pblk_rb_ring_count(mem, sync, rb->nr_entries);
224 }
225 
pblk_rb_read_commit(struct pblk_rb * rb,unsigned int nr_entries)226 unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
227 {
228 	unsigned int subm;
229 
230 	subm = READ_ONCE(rb->subm);
231 	/* Commit read means updating submission pointer */
232 	smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries));
233 
234 	return subm;
235 }
236 
__pblk_rb_update_l2p(struct pblk_rb * rb,unsigned int to_update)237 static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update)
238 {
239 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
240 	struct pblk_line *line;
241 	struct pblk_rb_entry *entry;
242 	struct pblk_w_ctx *w_ctx;
243 	unsigned int user_io = 0, gc_io = 0;
244 	unsigned int i;
245 	int flags;
246 
247 	for (i = 0; i < to_update; i++) {
248 		entry = &rb->entries[rb->l2p_update];
249 		w_ctx = &entry->w_ctx;
250 
251 		flags = READ_ONCE(entry->w_ctx.flags);
252 		if (flags & PBLK_IOTYPE_USER)
253 			user_io++;
254 		else if (flags & PBLK_IOTYPE_GC)
255 			gc_io++;
256 		else
257 			WARN(1, "pblk: unknown IO type\n");
258 
259 		pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
260 							entry->cacheline);
261 
262 		line = pblk_ppa_to_line(pblk, w_ctx->ppa);
263 		atomic_dec(&line->sec_to_update);
264 		kref_put(&line->ref, pblk_line_put);
265 		clean_wctx(w_ctx);
266 		rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1);
267 	}
268 
269 	pblk_rl_out(&pblk->rl, user_io, gc_io);
270 
271 	return 0;
272 }
273 
274 /*
275  * When we move the l2p_update pointer, we update the l2p table - lookups will
276  * point to the physical address instead of to the cacheline in the write buffer
277  * from this moment on.
278  */
pblk_rb_update_l2p(struct pblk_rb * rb,unsigned int nr_entries,unsigned int mem,unsigned int sync)279 static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries,
280 			      unsigned int mem, unsigned int sync)
281 {
282 	unsigned int space, count;
283 	int ret = 0;
284 
285 	lockdep_assert_held(&rb->w_lock);
286 
287 	/* Update l2p only as buffer entries are being overwritten */
288 	space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries);
289 	if (space > nr_entries)
290 		goto out;
291 
292 	count = nr_entries - space;
293 	/* l2p_update used exclusively under rb->w_lock */
294 	ret = __pblk_rb_update_l2p(rb, count);
295 
296 out:
297 	return ret;
298 }
299 
300 /*
301  * Update the l2p entry for all sectors stored on the write buffer. This means
302  * that all future lookups to the l2p table will point to a device address, not
303  * to the cacheline in the write buffer.
304  */
pblk_rb_sync_l2p(struct pblk_rb * rb)305 void pblk_rb_sync_l2p(struct pblk_rb *rb)
306 {
307 	unsigned int sync;
308 	unsigned int to_update;
309 
310 	spin_lock(&rb->w_lock);
311 
312 	/* Protect from reads and writes */
313 	sync = smp_load_acquire(&rb->sync);
314 
315 	to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries);
316 	__pblk_rb_update_l2p(rb, to_update);
317 
318 	spin_unlock(&rb->w_lock);
319 }
320 
321 /*
322  * Write @nr_entries to ring buffer from @data buffer if there is enough space.
323  * Typically, 4KB data chunks coming from a bio will be copied to the ring
324  * buffer, thus the write will fail if not all incoming data can be copied.
325  *
326  */
__pblk_rb_write_entry(struct pblk_rb * rb,void * data,struct pblk_w_ctx w_ctx,struct pblk_rb_entry * entry)327 static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data,
328 				  struct pblk_w_ctx w_ctx,
329 				  struct pblk_rb_entry *entry)
330 {
331 	memcpy(entry->data, data, rb->seg_size);
332 
333 	entry->w_ctx.lba = w_ctx.lba;
334 	entry->w_ctx.ppa = w_ctx.ppa;
335 }
336 
pblk_rb_write_entry_user(struct pblk_rb * rb,void * data,struct pblk_w_ctx w_ctx,unsigned int ring_pos)337 void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
338 			      struct pblk_w_ctx w_ctx, unsigned int ring_pos)
339 {
340 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
341 	struct pblk_rb_entry *entry;
342 	int flags;
343 
344 	entry = &rb->entries[ring_pos];
345 	flags = READ_ONCE(entry->w_ctx.flags);
346 #ifdef CONFIG_NVM_PBLK_DEBUG
347 	/* Caller must guarantee that the entry is free */
348 	BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
349 #endif
350 
351 	__pblk_rb_write_entry(rb, data, w_ctx, entry);
352 
353 	pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline);
354 	flags = w_ctx.flags | PBLK_WRITTEN_DATA;
355 
356 	/* Release flags on write context. Protect from writes */
357 	smp_store_release(&entry->w_ctx.flags, flags);
358 }
359 
pblk_rb_write_entry_gc(struct pblk_rb * rb,void * data,struct pblk_w_ctx w_ctx,struct pblk_line * line,u64 paddr,unsigned int ring_pos)360 void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
361 			    struct pblk_w_ctx w_ctx, struct pblk_line *line,
362 			    u64 paddr, unsigned int ring_pos)
363 {
364 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
365 	struct pblk_rb_entry *entry;
366 	int flags;
367 
368 	entry = &rb->entries[ring_pos];
369 	flags = READ_ONCE(entry->w_ctx.flags);
370 #ifdef CONFIG_NVM_PBLK_DEBUG
371 	/* Caller must guarantee that the entry is free */
372 	BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
373 #endif
374 
375 	__pblk_rb_write_entry(rb, data, w_ctx, entry);
376 
377 	if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr))
378 		entry->w_ctx.lba = ADDR_EMPTY;
379 
380 	flags = w_ctx.flags | PBLK_WRITTEN_DATA;
381 
382 	/* Release flags on write context. Protect from writes */
383 	smp_store_release(&entry->w_ctx.flags, flags);
384 }
385 
pblk_rb_flush_point_set(struct pblk_rb * rb,struct bio * bio,unsigned int pos)386 static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
387 				   unsigned int pos)
388 {
389 	struct pblk_rb_entry *entry;
390 	unsigned int sync, flush_point;
391 
392 	pblk_rb_sync_init(rb, NULL);
393 	sync = READ_ONCE(rb->sync);
394 
395 	if (pos == sync) {
396 		pblk_rb_sync_end(rb, NULL);
397 		return 0;
398 	}
399 
400 #ifdef CONFIG_NVM_PBLK_DEBUG
401 	atomic_inc(&rb->inflight_flush_point);
402 #endif
403 
404 	flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
405 	entry = &rb->entries[flush_point];
406 
407 	/* Protect flush points */
408 	smp_store_release(&rb->flush_point, flush_point);
409 
410 	if (bio)
411 		bio_list_add(&entry->w_ctx.bios, bio);
412 
413 	pblk_rb_sync_end(rb, NULL);
414 
415 	return bio ? 1 : 0;
416 }
417 
__pblk_rb_may_write(struct pblk_rb * rb,unsigned int nr_entries,unsigned int * pos)418 static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
419 			       unsigned int *pos)
420 {
421 	unsigned int mem;
422 	unsigned int sync;
423 	unsigned int threshold;
424 
425 	sync = READ_ONCE(rb->sync);
426 	mem = READ_ONCE(rb->mem);
427 
428 	threshold = nr_entries + rb->back_thres;
429 
430 	if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold)
431 		return 0;
432 
433 	if (pblk_rb_update_l2p(rb, nr_entries, mem, sync))
434 		return 0;
435 
436 	*pos = mem;
437 
438 	return 1;
439 }
440 
pblk_rb_may_write(struct pblk_rb * rb,unsigned int nr_entries,unsigned int * pos)441 static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
442 			     unsigned int *pos)
443 {
444 	if (!__pblk_rb_may_write(rb, nr_entries, pos))
445 		return 0;
446 
447 	/* Protect from read count */
448 	smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries));
449 	return 1;
450 }
451 
pblk_rb_flush(struct pblk_rb * rb)452 void pblk_rb_flush(struct pblk_rb *rb)
453 {
454 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
455 	unsigned int mem = READ_ONCE(rb->mem);
456 
457 	if (pblk_rb_flush_point_set(rb, NULL, mem))
458 		return;
459 
460 	pblk_write_kick(pblk);
461 }
462 
pblk_rb_may_write_flush(struct pblk_rb * rb,unsigned int nr_entries,unsigned int * pos,struct bio * bio,int * io_ret)463 static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
464 				   unsigned int *pos, struct bio *bio,
465 				   int *io_ret)
466 {
467 	unsigned int mem;
468 
469 	if (!__pblk_rb_may_write(rb, nr_entries, pos))
470 		return 0;
471 
472 	mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries);
473 	*io_ret = NVM_IO_DONE;
474 
475 	if (bio->bi_opf & REQ_PREFLUSH) {
476 		struct pblk *pblk = container_of(rb, struct pblk, rwb);
477 
478 		atomic64_inc(&pblk->nr_flush);
479 		if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem))
480 			*io_ret = NVM_IO_OK;
481 	}
482 
483 	/* Protect from read count */
484 	smp_store_release(&rb->mem, mem);
485 
486 	return 1;
487 }
488 
489 /*
490  * Atomically check that (i) there is space on the write buffer for the
491  * incoming I/O, and (ii) the current I/O type has enough budget in the write
492  * buffer (rate-limiter).
493  */
pblk_rb_may_write_user(struct pblk_rb * rb,struct bio * bio,unsigned int nr_entries,unsigned int * pos)494 int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
495 			   unsigned int nr_entries, unsigned int *pos)
496 {
497 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
498 	int io_ret;
499 
500 	spin_lock(&rb->w_lock);
501 	io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
502 	if (io_ret) {
503 		spin_unlock(&rb->w_lock);
504 		return io_ret;
505 	}
506 
507 	if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
508 		spin_unlock(&rb->w_lock);
509 		return NVM_IO_REQUEUE;
510 	}
511 
512 	pblk_rl_user_in(&pblk->rl, nr_entries);
513 	spin_unlock(&rb->w_lock);
514 
515 	return io_ret;
516 }
517 
518 /*
519  * Look at pblk_rb_may_write_user comment
520  */
pblk_rb_may_write_gc(struct pblk_rb * rb,unsigned int nr_entries,unsigned int * pos)521 int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
522 			 unsigned int *pos)
523 {
524 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
525 
526 	spin_lock(&rb->w_lock);
527 	if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) {
528 		spin_unlock(&rb->w_lock);
529 		return 0;
530 	}
531 
532 	if (!pblk_rb_may_write(rb, nr_entries, pos)) {
533 		spin_unlock(&rb->w_lock);
534 		return 0;
535 	}
536 
537 	pblk_rl_gc_in(&pblk->rl, nr_entries);
538 	spin_unlock(&rb->w_lock);
539 
540 	return 1;
541 }
542 
543 /*
544  * Read available entries on rb and add them to the given bio. To avoid a memory
545  * copy, a page reference to the write buffer is used to be added to the bio.
546  *
547  * This function is used by the write thread to form the write bio that will
548  * persist data on the write buffer to the media.
549  */
pblk_rb_read_to_bio(struct pblk_rb * rb,struct nvm_rq * rqd,unsigned int pos,unsigned int nr_entries,unsigned int count)550 unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
551 				 unsigned int pos, unsigned int nr_entries,
552 				 unsigned int count)
553 {
554 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
555 	struct request_queue *q = pblk->dev->q;
556 	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
557 	struct bio *bio = rqd->bio;
558 	struct pblk_rb_entry *entry;
559 	struct page *page;
560 	unsigned int pad = 0, to_read = nr_entries;
561 	unsigned int i;
562 	int flags;
563 
564 	if (count < nr_entries) {
565 		pad = nr_entries - count;
566 		to_read = count;
567 	}
568 
569 	/* Add space for packed metadata if in use*/
570 	pad += (pblk->min_write_pgs - pblk->min_write_pgs_data);
571 
572 	c_ctx->sentry = pos;
573 	c_ctx->nr_valid = to_read;
574 	c_ctx->nr_padded = pad;
575 
576 	for (i = 0; i < to_read; i++) {
577 		entry = &rb->entries[pos];
578 
579 		/* A write has been allowed into the buffer, but data is still
580 		 * being copied to it. It is ok to busy wait.
581 		 */
582 try:
583 		flags = READ_ONCE(entry->w_ctx.flags);
584 		if (!(flags & PBLK_WRITTEN_DATA)) {
585 			io_schedule();
586 			goto try;
587 		}
588 
589 		page = virt_to_page(entry->data);
590 		if (!page) {
591 			pblk_err(pblk, "could not allocate write bio page\n");
592 			flags &= ~PBLK_WRITTEN_DATA;
593 			flags |= PBLK_SUBMITTED_ENTRY;
594 			/* Release flags on context. Protect from writes */
595 			smp_store_release(&entry->w_ctx.flags, flags);
596 			return NVM_IO_ERR;
597 		}
598 
599 		if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
600 								rb->seg_size) {
601 			pblk_err(pblk, "could not add page to write bio\n");
602 			flags &= ~PBLK_WRITTEN_DATA;
603 			flags |= PBLK_SUBMITTED_ENTRY;
604 			/* Release flags on context. Protect from writes */
605 			smp_store_release(&entry->w_ctx.flags, flags);
606 			return NVM_IO_ERR;
607 		}
608 
609 		flags &= ~PBLK_WRITTEN_DATA;
610 		flags |= PBLK_SUBMITTED_ENTRY;
611 
612 		/* Release flags on context. Protect from writes */
613 		smp_store_release(&entry->w_ctx.flags, flags);
614 
615 		pos = pblk_rb_ptr_wrap(rb, pos, 1);
616 	}
617 
618 	if (pad) {
619 		if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
620 			pblk_err(pblk, "could not pad page in write bio\n");
621 			return NVM_IO_ERR;
622 		}
623 
624 		if (pad < pblk->min_write_pgs)
625 			atomic64_inc(&pblk->pad_dist[pad - 1]);
626 		else
627 			pblk_warn(pblk, "padding more than min. sectors\n");
628 
629 		atomic64_add(pad, &pblk->pad_wa);
630 	}
631 
632 #ifdef CONFIG_NVM_PBLK_DEBUG
633 	atomic_long_add(pad, &pblk->padded_writes);
634 #endif
635 
636 	return NVM_IO_OK;
637 }
638 
639 /*
640  * Copy to bio only if the lba matches the one on the given cache entry.
641  * Otherwise, it means that the entry has been overwritten, and the bio should
642  * be directed to disk.
643  */
pblk_rb_copy_to_bio(struct pblk_rb * rb,struct bio * bio,sector_t lba,struct ppa_addr ppa)644 int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
645 			struct ppa_addr ppa)
646 {
647 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
648 	struct pblk_rb_entry *entry;
649 	struct pblk_w_ctx *w_ctx;
650 	struct ppa_addr l2p_ppa;
651 	u64 pos = pblk_addr_to_cacheline(ppa);
652 	void *data;
653 	int flags;
654 	int ret = 1;
655 
656 
657 #ifdef CONFIG_NVM_PBLK_DEBUG
658 	/* Caller must ensure that the access will not cause an overflow */
659 	BUG_ON(pos >= rb->nr_entries);
660 #endif
661 	entry = &rb->entries[pos];
662 	w_ctx = &entry->w_ctx;
663 	flags = READ_ONCE(w_ctx->flags);
664 
665 	spin_lock(&rb->w_lock);
666 	spin_lock(&pblk->trans_lock);
667 	l2p_ppa = pblk_trans_map_get(pblk, lba);
668 	spin_unlock(&pblk->trans_lock);
669 
670 	/* Check if the entry has been overwritten or is scheduled to be */
671 	if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
672 						flags & PBLK_WRITABLE_ENTRY) {
673 		ret = 0;
674 		goto out;
675 	}
676 	data = bio_data(bio);
677 	memcpy(data, entry->data, rb->seg_size);
678 
679 out:
680 	spin_unlock(&rb->w_lock);
681 	return ret;
682 }
683 
pblk_rb_w_ctx(struct pblk_rb * rb,unsigned int pos)684 struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
685 {
686 	unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0);
687 
688 	return &rb->entries[entry].w_ctx;
689 }
690 
pblk_rb_sync_init(struct pblk_rb * rb,unsigned long * flags)691 unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags)
692 	__acquires(&rb->s_lock)
693 {
694 	if (flags)
695 		spin_lock_irqsave(&rb->s_lock, *flags);
696 	else
697 		spin_lock_irq(&rb->s_lock);
698 
699 	return rb->sync;
700 }
701 
pblk_rb_sync_end(struct pblk_rb * rb,unsigned long * flags)702 void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags)
703 	__releases(&rb->s_lock)
704 {
705 	lockdep_assert_held(&rb->s_lock);
706 
707 	if (flags)
708 		spin_unlock_irqrestore(&rb->s_lock, *flags);
709 	else
710 		spin_unlock_irq(&rb->s_lock);
711 }
712 
pblk_rb_sync_advance(struct pblk_rb * rb,unsigned int nr_entries)713 unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
714 {
715 	unsigned int sync, flush_point;
716 	lockdep_assert_held(&rb->s_lock);
717 
718 	sync = READ_ONCE(rb->sync);
719 	flush_point = READ_ONCE(rb->flush_point);
720 
721 	if (flush_point != EMPTY_ENTRY) {
722 		unsigned int secs_to_flush;
723 
724 		secs_to_flush = pblk_rb_ring_count(flush_point, sync,
725 					rb->nr_entries);
726 		if (secs_to_flush < nr_entries) {
727 			/* Protect flush points */
728 			smp_store_release(&rb->flush_point, EMPTY_ENTRY);
729 		}
730 	}
731 
732 	sync = pblk_rb_ptr_wrap(rb, sync, nr_entries);
733 
734 	/* Protect from counts */
735 	smp_store_release(&rb->sync, sync);
736 
737 	return sync;
738 }
739 
740 /* Calculate how many sectors to submit up to the current flush point. */
pblk_rb_flush_point_count(struct pblk_rb * rb)741 unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb)
742 {
743 	unsigned int subm, sync, flush_point;
744 	unsigned int submitted, to_flush;
745 
746 	/* Protect flush points */
747 	flush_point = smp_load_acquire(&rb->flush_point);
748 	if (flush_point == EMPTY_ENTRY)
749 		return 0;
750 
751 	/* Protect syncs */
752 	sync = smp_load_acquire(&rb->sync);
753 
754 	subm = READ_ONCE(rb->subm);
755 	submitted = pblk_rb_ring_count(subm, sync, rb->nr_entries);
756 
757 	/* The sync point itself counts as a sector to sync */
758 	to_flush = pblk_rb_ring_count(flush_point, sync, rb->nr_entries) + 1;
759 
760 	return (submitted < to_flush) ? (to_flush - submitted) : 0;
761 }
762 
pblk_rb_tear_down_check(struct pblk_rb * rb)763 int pblk_rb_tear_down_check(struct pblk_rb *rb)
764 {
765 	struct pblk_rb_entry *entry;
766 	int i;
767 	int ret = 0;
768 
769 	spin_lock(&rb->w_lock);
770 	spin_lock_irq(&rb->s_lock);
771 
772 	if ((rb->mem == rb->subm) && (rb->subm == rb->sync) &&
773 				(rb->sync == rb->l2p_update) &&
774 				(rb->flush_point == EMPTY_ENTRY)) {
775 		goto out;
776 	}
777 
778 	if (!rb->entries) {
779 		ret = 1;
780 		goto out;
781 	}
782 
783 	for (i = 0; i < rb->nr_entries; i++) {
784 		entry = &rb->entries[i];
785 
786 		if (!entry->data) {
787 			ret = 1;
788 			goto out;
789 		}
790 	}
791 
792 out:
793 	spin_unlock_irq(&rb->s_lock);
794 	spin_unlock(&rb->w_lock);
795 
796 	return ret;
797 }
798 
pblk_rb_wrap_pos(struct pblk_rb * rb,unsigned int pos)799 unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos)
800 {
801 	return (pos & (rb->nr_entries - 1));
802 }
803 
pblk_rb_pos_oob(struct pblk_rb * rb,u64 pos)804 int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos)
805 {
806 	return (pos >= rb->nr_entries);
807 }
808 
pblk_rb_sysfs(struct pblk_rb * rb,char * buf)809 ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf)
810 {
811 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
812 	struct pblk_c_ctx *c;
813 	ssize_t offset;
814 	int queued_entries = 0;
815 
816 	spin_lock_irq(&rb->s_lock);
817 	list_for_each_entry(c, &pblk->compl_list, list)
818 		queued_entries++;
819 	spin_unlock_irq(&rb->s_lock);
820 
821 	if (rb->flush_point != EMPTY_ENTRY)
822 		offset = scnprintf(buf, PAGE_SIZE,
823 			"%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n",
824 			rb->nr_entries,
825 			rb->mem,
826 			rb->subm,
827 			rb->sync,
828 			rb->l2p_update,
829 #ifdef CONFIG_NVM_PBLK_DEBUG
830 			atomic_read(&rb->inflight_flush_point),
831 #else
832 			0,
833 #endif
834 			rb->flush_point,
835 			pblk_rb_read_count(rb),
836 			pblk_rb_space(rb),
837 			pblk_rb_flush_point_count(rb),
838 			queued_entries);
839 	else
840 		offset = scnprintf(buf, PAGE_SIZE,
841 			"%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n",
842 			rb->nr_entries,
843 			rb->mem,
844 			rb->subm,
845 			rb->sync,
846 			rb->l2p_update,
847 #ifdef CONFIG_NVM_PBLK_DEBUG
848 			atomic_read(&rb->inflight_flush_point),
849 #else
850 			0,
851 #endif
852 			pblk_rb_read_count(rb),
853 			pblk_rb_space(rb),
854 			pblk_rb_flush_point_count(rb),
855 			queued_entries);
856 
857 	return offset;
858 }
859