• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
3  * Copyright (C) 2016 CNEX Labs
4  * Initial release: Javier Gonzalez <javier@cnexlabs.com>
5  *                  Matias Bjorling <matias@cnexlabs.com>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License version
9  * 2 as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * General Public License for more details.
15  *
16  * Implementation of a physical block-device target for Open-channel SSDs.
17  *
18  * pblk-init.c - pblk's initialization.
19  */
20 
21 #include "pblk.h"
22 
23 static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
24 				*pblk_w_rq_cache, *pblk_line_meta_cache;
25 static DECLARE_RWSEM(pblk_lock);
26 struct bio_set *pblk_bio_set;
27 
pblk_rw_io(struct request_queue * q,struct pblk * pblk,struct bio * bio)28 static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
29 			  struct bio *bio)
30 {
31 	int ret;
32 
33 	/* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
34 	 * constraint. Writes can be of arbitrary size.
35 	 */
36 	if (bio_data_dir(bio) == READ) {
37 		blk_queue_split(q, &bio);
38 		ret = pblk_submit_read(pblk, bio);
39 		if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
40 			bio_put(bio);
41 
42 		return ret;
43 	}
44 
45 	/* Prevent deadlock in the case of a modest LUN configuration and large
46 	 * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
47 	 * available for user I/O.
48 	 */
49 	if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
50 		blk_queue_split(q, &bio);
51 
52 	return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
53 }
54 
pblk_make_rq(struct request_queue * q,struct bio * bio)55 static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
56 {
57 	struct pblk *pblk = q->queuedata;
58 
59 	if (bio_op(bio) == REQ_OP_DISCARD) {
60 		pblk_discard(pblk, bio);
61 		if (!(bio->bi_opf & REQ_PREFLUSH)) {
62 			bio_endio(bio);
63 			return BLK_QC_T_NONE;
64 		}
65 	}
66 
67 	switch (pblk_rw_io(q, pblk, bio)) {
68 	case NVM_IO_ERR:
69 		bio_io_error(bio);
70 		break;
71 	case NVM_IO_DONE:
72 		bio_endio(bio);
73 		break;
74 	}
75 
76 	return BLK_QC_T_NONE;
77 }
78 
pblk_l2p_free(struct pblk * pblk)79 static void pblk_l2p_free(struct pblk *pblk)
80 {
81 	vfree(pblk->trans_map);
82 }
83 
pblk_l2p_init(struct pblk * pblk)84 static int pblk_l2p_init(struct pblk *pblk)
85 {
86 	sector_t i;
87 	struct ppa_addr ppa;
88 	int entry_size = 8;
89 
90 	if (pblk->ppaf_bitsize < 32)
91 		entry_size = 4;
92 
93 	pblk->trans_map = vmalloc(entry_size * pblk->rl.nr_secs);
94 	if (!pblk->trans_map)
95 		return -ENOMEM;
96 
97 	pblk_ppa_set_empty(&ppa);
98 
99 	for (i = 0; i < pblk->rl.nr_secs; i++)
100 		pblk_trans_map_set(pblk, i, ppa);
101 
102 	return 0;
103 }
104 
pblk_rwb_free(struct pblk * pblk)105 static void pblk_rwb_free(struct pblk *pblk)
106 {
107 	if (pblk_rb_tear_down_check(&pblk->rwb))
108 		pr_err("pblk: write buffer error on tear down\n");
109 
110 	pblk_rb_data_free(&pblk->rwb);
111 	vfree(pblk_rb_entries_ref(&pblk->rwb));
112 }
113 
pblk_rwb_init(struct pblk * pblk)114 static int pblk_rwb_init(struct pblk *pblk)
115 {
116 	struct nvm_tgt_dev *dev = pblk->dev;
117 	struct nvm_geo *geo = &dev->geo;
118 	struct pblk_rb_entry *entries;
119 	unsigned long nr_entries;
120 	unsigned int power_size, power_seg_sz;
121 
122 	nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer);
123 
124 	entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry));
125 	if (!entries)
126 		return -ENOMEM;
127 
128 	power_size = get_count_order(nr_entries);
129 	power_seg_sz = get_count_order(geo->sec_size);
130 
131 	return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz);
132 }
133 
134 /* Minimum pages needed within a lun */
135 #define ADDR_POOL_SIZE 64
136 
pblk_set_ppaf(struct pblk * pblk)137 static int pblk_set_ppaf(struct pblk *pblk)
138 {
139 	struct nvm_tgt_dev *dev = pblk->dev;
140 	struct nvm_geo *geo = &dev->geo;
141 	struct nvm_addr_format ppaf = geo->ppaf;
142 	int power_len;
143 
144 	/* Re-calculate channel and lun format to adapt to configuration */
145 	power_len = get_count_order(geo->nr_chnls);
146 	if (1 << power_len != geo->nr_chnls) {
147 		pr_err("pblk: supports only power-of-two channel config.\n");
148 		return -EINVAL;
149 	}
150 	ppaf.ch_len = power_len;
151 
152 	power_len = get_count_order(geo->luns_per_chnl);
153 	if (1 << power_len != geo->luns_per_chnl) {
154 		pr_err("pblk: supports only power-of-two LUN config.\n");
155 		return -EINVAL;
156 	}
157 	ppaf.lun_len = power_len;
158 
159 	pblk->ppaf.sec_offset = 0;
160 	pblk->ppaf.pln_offset = ppaf.sect_len;
161 	pblk->ppaf.ch_offset = pblk->ppaf.pln_offset + ppaf.pln_len;
162 	pblk->ppaf.lun_offset = pblk->ppaf.ch_offset + ppaf.ch_len;
163 	pblk->ppaf.pg_offset = pblk->ppaf.lun_offset + ppaf.lun_len;
164 	pblk->ppaf.blk_offset = pblk->ppaf.pg_offset + ppaf.pg_len;
165 	pblk->ppaf.sec_mask = (1ULL << ppaf.sect_len) - 1;
166 	pblk->ppaf.pln_mask = ((1ULL << ppaf.pln_len) - 1) <<
167 							pblk->ppaf.pln_offset;
168 	pblk->ppaf.ch_mask = ((1ULL << ppaf.ch_len) - 1) <<
169 							pblk->ppaf.ch_offset;
170 	pblk->ppaf.lun_mask = ((1ULL << ppaf.lun_len) - 1) <<
171 							pblk->ppaf.lun_offset;
172 	pblk->ppaf.pg_mask = ((1ULL << ppaf.pg_len) - 1) <<
173 							pblk->ppaf.pg_offset;
174 	pblk->ppaf.blk_mask = ((1ULL << ppaf.blk_len) - 1) <<
175 							pblk->ppaf.blk_offset;
176 
177 	pblk->ppaf_bitsize = pblk->ppaf.blk_offset + ppaf.blk_len;
178 
179 	return 0;
180 }
181 
pblk_init_global_caches(struct pblk * pblk)182 static int pblk_init_global_caches(struct pblk *pblk)
183 {
184 	char cache_name[PBLK_CACHE_NAME_LEN];
185 
186 	down_write(&pblk_lock);
187 	pblk_blk_ws_cache = kmem_cache_create("pblk_blk_ws",
188 				sizeof(struct pblk_line_ws), 0, 0, NULL);
189 	if (!pblk_blk_ws_cache) {
190 		up_write(&pblk_lock);
191 		return -ENOMEM;
192 	}
193 
194 	pblk_rec_cache = kmem_cache_create("pblk_rec",
195 				sizeof(struct pblk_rec_ctx), 0, 0, NULL);
196 	if (!pblk_rec_cache) {
197 		kmem_cache_destroy(pblk_blk_ws_cache);
198 		up_write(&pblk_lock);
199 		return -ENOMEM;
200 	}
201 
202 	pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
203 				0, 0, NULL);
204 	if (!pblk_g_rq_cache) {
205 		kmem_cache_destroy(pblk_blk_ws_cache);
206 		kmem_cache_destroy(pblk_rec_cache);
207 		up_write(&pblk_lock);
208 		return -ENOMEM;
209 	}
210 
211 	pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size,
212 				0, 0, NULL);
213 	if (!pblk_w_rq_cache) {
214 		kmem_cache_destroy(pblk_blk_ws_cache);
215 		kmem_cache_destroy(pblk_rec_cache);
216 		kmem_cache_destroy(pblk_g_rq_cache);
217 		up_write(&pblk_lock);
218 		return -ENOMEM;
219 	}
220 
221 	snprintf(cache_name, sizeof(cache_name), "pblk_line_m_%s",
222 							pblk->disk->disk_name);
223 	pblk_line_meta_cache = kmem_cache_create(cache_name,
224 				pblk->lm.sec_bitmap_len, 0, 0, NULL);
225 	if (!pblk_line_meta_cache) {
226 		kmem_cache_destroy(pblk_blk_ws_cache);
227 		kmem_cache_destroy(pblk_rec_cache);
228 		kmem_cache_destroy(pblk_g_rq_cache);
229 		kmem_cache_destroy(pblk_w_rq_cache);
230 		up_write(&pblk_lock);
231 		return -ENOMEM;
232 	}
233 	up_write(&pblk_lock);
234 
235 	return 0;
236 }
237 
pblk_core_init(struct pblk * pblk)238 static int pblk_core_init(struct pblk *pblk)
239 {
240 	struct nvm_tgt_dev *dev = pblk->dev;
241 	struct nvm_geo *geo = &dev->geo;
242 
243 	pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
244 						geo->nr_planes * geo->nr_luns;
245 
246 	if (pblk_init_global_caches(pblk))
247 		return -ENOMEM;
248 
249 	/* internal bios can be at most the sectors signaled by the device. */
250 	pblk->page_bio_pool = mempool_create_page_pool(nvm_max_phys_sects(dev),
251 									0);
252 	if (!pblk->page_bio_pool)
253 		return -ENOMEM;
254 
255 	pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE,
256 							pblk_blk_ws_cache);
257 	if (!pblk->line_ws_pool)
258 		goto free_page_bio_pool;
259 
260 	pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache);
261 	if (!pblk->rec_pool)
262 		goto free_blk_ws_pool;
263 
264 	pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE,
265 							pblk_g_rq_cache);
266 	if (!pblk->g_rq_pool)
267 		goto free_rec_pool;
268 
269 	pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2,
270 							pblk_w_rq_cache);
271 	if (!pblk->w_rq_pool)
272 		goto free_g_rq_pool;
273 
274 	pblk->line_meta_pool =
275 			mempool_create_slab_pool(PBLK_META_POOL_SIZE,
276 							pblk_line_meta_cache);
277 	if (!pblk->line_meta_pool)
278 		goto free_w_rq_pool;
279 
280 	pblk->close_wq = alloc_workqueue("pblk-close-wq",
281 			WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
282 	if (!pblk->close_wq)
283 		goto free_line_meta_pool;
284 
285 	pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
286 			WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
287 	if (!pblk->bb_wq)
288 		goto free_close_wq;
289 
290 	if (pblk_set_ppaf(pblk))
291 		goto free_bb_wq;
292 
293 	if (pblk_rwb_init(pblk))
294 		goto free_bb_wq;
295 
296 	INIT_LIST_HEAD(&pblk->compl_list);
297 	return 0;
298 
299 free_bb_wq:
300 	destroy_workqueue(pblk->bb_wq);
301 free_close_wq:
302 	destroy_workqueue(pblk->close_wq);
303 free_line_meta_pool:
304 	mempool_destroy(pblk->line_meta_pool);
305 free_w_rq_pool:
306 	mempool_destroy(pblk->w_rq_pool);
307 free_g_rq_pool:
308 	mempool_destroy(pblk->g_rq_pool);
309 free_rec_pool:
310 	mempool_destroy(pblk->rec_pool);
311 free_blk_ws_pool:
312 	mempool_destroy(pblk->line_ws_pool);
313 free_page_bio_pool:
314 	mempool_destroy(pblk->page_bio_pool);
315 	return -ENOMEM;
316 }
317 
pblk_core_free(struct pblk * pblk)318 static void pblk_core_free(struct pblk *pblk)
319 {
320 	if (pblk->close_wq)
321 		destroy_workqueue(pblk->close_wq);
322 
323 	if (pblk->bb_wq)
324 		destroy_workqueue(pblk->bb_wq);
325 
326 	mempool_destroy(pblk->page_bio_pool);
327 	mempool_destroy(pblk->line_ws_pool);
328 	mempool_destroy(pblk->rec_pool);
329 	mempool_destroy(pblk->g_rq_pool);
330 	mempool_destroy(pblk->w_rq_pool);
331 	mempool_destroy(pblk->line_meta_pool);
332 
333 	kmem_cache_destroy(pblk_blk_ws_cache);
334 	kmem_cache_destroy(pblk_rec_cache);
335 	kmem_cache_destroy(pblk_g_rq_cache);
336 	kmem_cache_destroy(pblk_w_rq_cache);
337 	kmem_cache_destroy(pblk_line_meta_cache);
338 }
339 
pblk_luns_free(struct pblk * pblk)340 static void pblk_luns_free(struct pblk *pblk)
341 {
342 	kfree(pblk->luns);
343 }
344 
pblk_free_line_bitmaps(struct pblk_line * line)345 static void pblk_free_line_bitmaps(struct pblk_line *line)
346 {
347 	kfree(line->blk_bitmap);
348 	kfree(line->erase_bitmap);
349 }
350 
pblk_lines_free(struct pblk * pblk)351 static void pblk_lines_free(struct pblk *pblk)
352 {
353 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
354 	struct pblk_line *line;
355 	int i;
356 
357 	spin_lock(&l_mg->free_lock);
358 	for (i = 0; i < l_mg->nr_lines; i++) {
359 		line = &pblk->lines[i];
360 
361 		pblk_line_free(pblk, line);
362 		pblk_free_line_bitmaps(line);
363 	}
364 	spin_unlock(&l_mg->free_lock);
365 }
366 
pblk_line_meta_free(struct pblk * pblk)367 static void pblk_line_meta_free(struct pblk *pblk)
368 {
369 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
370 	int i;
371 
372 	kfree(l_mg->bb_template);
373 	kfree(l_mg->bb_aux);
374 	kfree(l_mg->vsc_list);
375 
376 	spin_lock(&l_mg->free_lock);
377 	for (i = 0; i < PBLK_DATA_LINES; i++) {
378 		kfree(l_mg->sline_meta[i]);
379 		pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
380 		kfree(l_mg->eline_meta[i]);
381 	}
382 	spin_unlock(&l_mg->free_lock);
383 
384 	kfree(pblk->lines);
385 }
386 
pblk_bb_discovery(struct nvm_tgt_dev * dev,struct pblk_lun * rlun)387 static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun)
388 {
389 	struct nvm_geo *geo = &dev->geo;
390 	struct ppa_addr ppa;
391 	u8 *blks;
392 	int nr_blks, ret;
393 
394 	nr_blks = geo->blks_per_lun * geo->plane_mode;
395 	blks = kmalloc(nr_blks, GFP_KERNEL);
396 	if (!blks)
397 		return -ENOMEM;
398 
399 	ppa.ppa = 0;
400 	ppa.g.ch = rlun->bppa.g.ch;
401 	ppa.g.lun = rlun->bppa.g.lun;
402 
403 	ret = nvm_get_tgt_bb_tbl(dev, ppa, blks);
404 	if (ret)
405 		goto out;
406 
407 	nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
408 	if (nr_blks < 0) {
409 		ret = nr_blks;
410 		goto out;
411 	}
412 
413 	rlun->bb_list = blks;
414 
415 	return 0;
416 out:
417 	kfree(blks);
418 	return ret;
419 }
420 
pblk_bb_line(struct pblk * pblk,struct pblk_line * line,int blk_per_line)421 static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
422 			int blk_per_line)
423 {
424 	struct nvm_tgt_dev *dev = pblk->dev;
425 	struct nvm_geo *geo = &dev->geo;
426 	struct pblk_lun *rlun;
427 	int bb_cnt = 0;
428 	int i;
429 
430 	for (i = 0; i < blk_per_line; i++) {
431 		rlun = &pblk->luns[i];
432 		if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
433 			continue;
434 
435 		set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
436 		bb_cnt++;
437 	}
438 
439 	return bb_cnt;
440 }
441 
pblk_alloc_line_bitmaps(struct pblk * pblk,struct pblk_line * line)442 static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line)
443 {
444 	struct pblk_line_meta *lm = &pblk->lm;
445 
446 	line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
447 	if (!line->blk_bitmap)
448 		return -ENOMEM;
449 
450 	line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
451 	if (!line->erase_bitmap) {
452 		kfree(line->blk_bitmap);
453 		return -ENOMEM;
454 	}
455 
456 	return 0;
457 }
458 
pblk_luns_init(struct pblk * pblk,struct ppa_addr * luns)459 static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
460 {
461 	struct nvm_tgt_dev *dev = pblk->dev;
462 	struct nvm_geo *geo = &dev->geo;
463 	struct pblk_lun *rlun;
464 	int i, ret;
465 
466 	/* TODO: Implement unbalanced LUN support */
467 	if (geo->luns_per_chnl < 0) {
468 		pr_err("pblk: unbalanced LUN config.\n");
469 		return -EINVAL;
470 	}
471 
472 	pblk->luns = kcalloc(geo->nr_luns, sizeof(struct pblk_lun), GFP_KERNEL);
473 	if (!pblk->luns)
474 		return -ENOMEM;
475 
476 	for (i = 0; i < geo->nr_luns; i++) {
477 		/* Stripe across channels */
478 		int ch = i % geo->nr_chnls;
479 		int lun_raw = i / geo->nr_chnls;
480 		int lunid = lun_raw + ch * geo->luns_per_chnl;
481 
482 		rlun = &pblk->luns[i];
483 		rlun->bppa = luns[lunid];
484 
485 		sema_init(&rlun->wr_sem, 1);
486 
487 		ret = pblk_bb_discovery(dev, rlun);
488 		if (ret) {
489 			while (--i >= 0)
490 				kfree(pblk->luns[i].bb_list);
491 			return ret;
492 		}
493 	}
494 
495 	return 0;
496 }
497 
pblk_lines_configure(struct pblk * pblk,int flags)498 static int pblk_lines_configure(struct pblk *pblk, int flags)
499 {
500 	struct pblk_line *line = NULL;
501 	int ret = 0;
502 
503 	if (!(flags & NVM_TARGET_FACTORY)) {
504 		line = pblk_recov_l2p(pblk);
505 		if (IS_ERR(line)) {
506 			pr_err("pblk: could not recover l2p table\n");
507 			ret = -EFAULT;
508 		}
509 	}
510 
511 	if (!line) {
512 		/* Configure next line for user data */
513 		line = pblk_line_get_first_data(pblk);
514 		if (!line) {
515 			pr_err("pblk: line list corrupted\n");
516 			ret = -EFAULT;
517 		}
518 	}
519 
520 	return ret;
521 }
522 
523 /* See comment over struct line_emeta definition */
calc_emeta_len(struct pblk * pblk)524 static unsigned int calc_emeta_len(struct pblk *pblk)
525 {
526 	struct pblk_line_meta *lm = &pblk->lm;
527 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
528 	struct nvm_tgt_dev *dev = pblk->dev;
529 	struct nvm_geo *geo = &dev->geo;
530 
531 	/* Round to sector size so that lba_list starts on its own sector */
532 	lm->emeta_sec[1] = DIV_ROUND_UP(
533 			sizeof(struct line_emeta) + lm->blk_bitmap_len,
534 			geo->sec_size);
535 	lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;
536 
537 	/* Round to sector size so that vsc_list starts on its own sector */
538 	lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
539 	lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
540 			geo->sec_size);
541 	lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size;
542 
543 	lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
544 			geo->sec_size);
545 	lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size;
546 
547 	lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
548 
549 	return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
550 }
551 
pblk_set_provision(struct pblk * pblk,long nr_free_blks)552 static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
553 {
554 	struct nvm_tgt_dev *dev = pblk->dev;
555 	struct nvm_geo *geo = &dev->geo;
556 	sector_t provisioned;
557 
558 	pblk->over_pct = 20;
559 
560 	provisioned = nr_free_blks;
561 	provisioned *= (100 - pblk->over_pct);
562 	sector_div(provisioned, 100);
563 
564 	/* Internally pblk manages all free blocks, but all calculations based
565 	 * on user capacity consider only provisioned blocks
566 	 */
567 	pblk->rl.total_blocks = nr_free_blks;
568 	pblk->rl.nr_secs = nr_free_blks * geo->sec_per_blk;
569 	pblk->capacity = provisioned * geo->sec_per_blk;
570 	atomic_set(&pblk->rl.free_blocks, nr_free_blks);
571 }
572 
pblk_lines_alloc_metadata(struct pblk * pblk)573 static int pblk_lines_alloc_metadata(struct pblk *pblk)
574 {
575 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
576 	struct pblk_line_meta *lm = &pblk->lm;
577 	int i;
578 
579 	/* smeta is always small enough to fit on a kmalloc memory allocation,
580 	 * emeta depends on the number of LUNs allocated to the pblk instance
581 	 */
582 	for (i = 0; i < PBLK_DATA_LINES; i++) {
583 		l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
584 		if (!l_mg->sline_meta[i])
585 			goto fail_free_smeta;
586 	}
587 
588 	/* emeta allocates three different buffers for managing metadata with
589 	 * in-memory and in-media layouts
590 	 */
591 	for (i = 0; i < PBLK_DATA_LINES; i++) {
592 		struct pblk_emeta *emeta;
593 
594 		emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
595 		if (!emeta)
596 			goto fail_free_emeta;
597 
598 		if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) {
599 			l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
600 
601 			emeta->buf = vmalloc(lm->emeta_len[0]);
602 			if (!emeta->buf) {
603 				kfree(emeta);
604 				goto fail_free_emeta;
605 			}
606 
607 			emeta->nr_entries = lm->emeta_sec[0];
608 			l_mg->eline_meta[i] = emeta;
609 		} else {
610 			l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
611 
612 			emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL);
613 			if (!emeta->buf) {
614 				kfree(emeta);
615 				goto fail_free_emeta;
616 			}
617 
618 			emeta->nr_entries = lm->emeta_sec[0];
619 			l_mg->eline_meta[i] = emeta;
620 		}
621 	}
622 
623 	l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
624 	if (!l_mg->vsc_list)
625 		goto fail_free_emeta;
626 
627 	for (i = 0; i < l_mg->nr_lines; i++)
628 		l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
629 
630 	return 0;
631 
632 fail_free_emeta:
633 	while (--i >= 0) {
634 		vfree(l_mg->eline_meta[i]->buf);
635 		kfree(l_mg->eline_meta[i]);
636 	}
637 
638 fail_free_smeta:
639 	for (i = 0; i < PBLK_DATA_LINES; i++)
640 		kfree(l_mg->sline_meta[i]);
641 
642 	return -ENOMEM;
643 }
644 
pblk_lines_init(struct pblk * pblk)645 static int pblk_lines_init(struct pblk *pblk)
646 {
647 	struct nvm_tgt_dev *dev = pblk->dev;
648 	struct nvm_geo *geo = &dev->geo;
649 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
650 	struct pblk_line_meta *lm = &pblk->lm;
651 	struct pblk_line *line;
652 	unsigned int smeta_len, emeta_len;
653 	long nr_bad_blks, nr_free_blks;
654 	int bb_distance, max_write_ppas, mod;
655 	int i, ret;
656 
657 	pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
658 	max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
659 	pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
660 				max_write_ppas : nvm_max_phys_sects(dev);
661 	pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
662 
663 	if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
664 		pr_err("pblk: cannot support device max_phys_sect\n");
665 		return -EINVAL;
666 	}
667 
668 	div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
669 	if (mod) {
670 		pr_err("pblk: bad configuration of sectors/pages\n");
671 		return -EINVAL;
672 	}
673 
674 	l_mg->nr_lines = geo->blks_per_lun;
675 	l_mg->log_line = l_mg->data_line = NULL;
676 	l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
677 	l_mg->nr_free_lines = 0;
678 	bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
679 
680 	lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
681 	lm->blk_per_line = geo->nr_luns;
682 	lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
683 	lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long);
684 	lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
685 	lm->mid_thrs = lm->sec_per_line / 2;
686 	lm->high_thrs = lm->sec_per_line / 4;
687 	lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs;
688 
689 	/* Calculate necessary pages for smeta. See comment over struct
690 	 * line_smeta definition
691 	 */
692 	i = 1;
693 add_smeta_page:
694 	lm->smeta_sec = i * geo->sec_per_pl;
695 	lm->smeta_len = lm->smeta_sec * geo->sec_size;
696 
697 	smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
698 	if (smeta_len > lm->smeta_len) {
699 		i++;
700 		goto add_smeta_page;
701 	}
702 
703 	/* Calculate necessary pages for emeta. See comment over struct
704 	 * line_emeta definition
705 	 */
706 	i = 1;
707 add_emeta_page:
708 	lm->emeta_sec[0] = i * geo->sec_per_pl;
709 	lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size;
710 
711 	emeta_len = calc_emeta_len(pblk);
712 	if (emeta_len > lm->emeta_len[0]) {
713 		i++;
714 		goto add_emeta_page;
715 	}
716 
717 	lm->emeta_bb = geo->nr_luns - i;
718 	lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0],
719 							geo->sec_per_blk);
720 	if (lm->min_blk_line > lm->blk_per_line) {
721 		pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
722 							lm->blk_per_line);
723 		ret = -EINVAL;
724 		goto fail;
725 	}
726 
727 	ret = pblk_lines_alloc_metadata(pblk);
728 	if (ret)
729 		goto fail;
730 
731 	l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
732 	if (!l_mg->bb_template) {
733 		ret = -ENOMEM;
734 		goto fail_free_meta;
735 	}
736 
737 	l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
738 	if (!l_mg->bb_aux) {
739 		ret = -ENOMEM;
740 		goto fail_free_bb_template;
741 	}
742 
743 	bb_distance = (geo->nr_luns) * geo->sec_per_pl;
744 	for (i = 0; i < lm->sec_per_line; i += bb_distance)
745 		bitmap_set(l_mg->bb_template, i, geo->sec_per_pl);
746 
747 	INIT_LIST_HEAD(&l_mg->free_list);
748 	INIT_LIST_HEAD(&l_mg->corrupt_list);
749 	INIT_LIST_HEAD(&l_mg->bad_list);
750 	INIT_LIST_HEAD(&l_mg->gc_full_list);
751 	INIT_LIST_HEAD(&l_mg->gc_high_list);
752 	INIT_LIST_HEAD(&l_mg->gc_mid_list);
753 	INIT_LIST_HEAD(&l_mg->gc_low_list);
754 	INIT_LIST_HEAD(&l_mg->gc_empty_list);
755 
756 	INIT_LIST_HEAD(&l_mg->emeta_list);
757 
758 	l_mg->gc_lists[0] = &l_mg->gc_high_list;
759 	l_mg->gc_lists[1] = &l_mg->gc_mid_list;
760 	l_mg->gc_lists[2] = &l_mg->gc_low_list;
761 
762 	spin_lock_init(&l_mg->free_lock);
763 	spin_lock_init(&l_mg->close_lock);
764 	spin_lock_init(&l_mg->gc_lock);
765 
766 	pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
767 								GFP_KERNEL);
768 	if (!pblk->lines) {
769 		ret = -ENOMEM;
770 		goto fail_free_bb_aux;
771 	}
772 
773 	nr_free_blks = 0;
774 	for (i = 0; i < l_mg->nr_lines; i++) {
775 		int blk_in_line;
776 
777 		line = &pblk->lines[i];
778 
779 		line->pblk = pblk;
780 		line->id = i;
781 		line->type = PBLK_LINETYPE_FREE;
782 		line->state = PBLK_LINESTATE_FREE;
783 		line->gc_group = PBLK_LINEGC_NONE;
784 		line->vsc = &l_mg->vsc_list[i];
785 		spin_lock_init(&line->lock);
786 
787 		ret = pblk_alloc_line_bitmaps(pblk, line);
788 		if (ret)
789 			goto fail_free_lines;
790 
791 		nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line);
792 		if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
793 			pblk_free_line_bitmaps(line);
794 			ret = -EINVAL;
795 			goto fail_free_lines;
796 		}
797 
798 		blk_in_line = lm->blk_per_line - nr_bad_blks;
799 		if (blk_in_line < lm->min_blk_line) {
800 			line->state = PBLK_LINESTATE_BAD;
801 			list_add_tail(&line->list, &l_mg->bad_list);
802 			continue;
803 		}
804 
805 		nr_free_blks += blk_in_line;
806 		atomic_set(&line->blk_in_line, blk_in_line);
807 
808 		l_mg->nr_free_lines++;
809 		list_add_tail(&line->list, &l_mg->free_list);
810 	}
811 
812 	pblk_set_provision(pblk, nr_free_blks);
813 
814 	/* Cleanup per-LUN bad block lists - managed within lines on run-time */
815 	for (i = 0; i < geo->nr_luns; i++)
816 		kfree(pblk->luns[i].bb_list);
817 
818 	return 0;
819 fail_free_lines:
820 	while (--i >= 0)
821 		pblk_free_line_bitmaps(&pblk->lines[i]);
822 fail_free_bb_aux:
823 	kfree(l_mg->bb_aux);
824 fail_free_bb_template:
825 	kfree(l_mg->bb_template);
826 fail_free_meta:
827 	pblk_line_meta_free(pblk);
828 fail:
829 	for (i = 0; i < geo->nr_luns; i++)
830 		kfree(pblk->luns[i].bb_list);
831 
832 	return ret;
833 }
834 
pblk_writer_init(struct pblk * pblk)835 static int pblk_writer_init(struct pblk *pblk)
836 {
837 	setup_timer(&pblk->wtimer, pblk_write_timer_fn, (unsigned long)pblk);
838 	mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100));
839 
840 	pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t");
841 	if (IS_ERR(pblk->writer_ts)) {
842 		pr_err("pblk: could not allocate writer kthread\n");
843 		return PTR_ERR(pblk->writer_ts);
844 	}
845 
846 	return 0;
847 }
848 
pblk_writer_stop(struct pblk * pblk)849 static void pblk_writer_stop(struct pblk *pblk)
850 {
851 	/* The pipeline must be stopped and the write buffer emptied before the
852 	 * write thread is stopped
853 	 */
854 	WARN(pblk_rb_read_count(&pblk->rwb),
855 			"Stopping not fully persisted write buffer\n");
856 
857 	WARN(pblk_rb_sync_count(&pblk->rwb),
858 			"Stopping not fully synced write buffer\n");
859 
860 	if (pblk->writer_ts)
861 		kthread_stop(pblk->writer_ts);
862 	del_timer(&pblk->wtimer);
863 }
864 
pblk_free(struct pblk * pblk)865 static void pblk_free(struct pblk *pblk)
866 {
867 	pblk_luns_free(pblk);
868 	pblk_lines_free(pblk);
869 	pblk_line_meta_free(pblk);
870 	pblk_core_free(pblk);
871 	pblk_l2p_free(pblk);
872 
873 	kfree(pblk);
874 }
875 
pblk_tear_down(struct pblk * pblk)876 static void pblk_tear_down(struct pblk *pblk)
877 {
878 	pblk_pipeline_stop(pblk);
879 	pblk_writer_stop(pblk);
880 	pblk_rb_sync_l2p(&pblk->rwb);
881 	pblk_rwb_free(pblk);
882 	pblk_rl_free(&pblk->rl);
883 
884 	pr_debug("pblk: consistent tear down\n");
885 }
886 
pblk_exit(void * private)887 static void pblk_exit(void *private)
888 {
889 	struct pblk *pblk = private;
890 
891 	down_write(&pblk_lock);
892 	pblk_gc_exit(pblk);
893 	pblk_tear_down(pblk);
894 	pblk_free(pblk);
895 	up_write(&pblk_lock);
896 }
897 
pblk_capacity(void * private)898 static sector_t pblk_capacity(void *private)
899 {
900 	struct pblk *pblk = private;
901 
902 	return pblk->capacity * NR_PHY_IN_LOG;
903 }
904 
pblk_init(struct nvm_tgt_dev * dev,struct gendisk * tdisk,int flags)905 static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
906 		       int flags)
907 {
908 	struct nvm_geo *geo = &dev->geo;
909 	struct request_queue *bqueue = dev->q;
910 	struct request_queue *tqueue = tdisk->queue;
911 	struct pblk *pblk;
912 	int ret;
913 
914 	if (dev->identity.dom & NVM_RSP_L2P) {
915 		pr_err("pblk: device-side L2P table not supported. (%x)\n",
916 							dev->identity.dom);
917 		return ERR_PTR(-EINVAL);
918 	}
919 
920 	pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL);
921 	if (!pblk)
922 		return ERR_PTR(-ENOMEM);
923 
924 	pblk->dev = dev;
925 	pblk->disk = tdisk;
926 	pblk->state = PBLK_STATE_RUNNING;
927 	pblk->gc.gc_enabled = 0;
928 
929 	spin_lock_init(&pblk->trans_lock);
930 	spin_lock_init(&pblk->lock);
931 
932 	if (flags & NVM_TARGET_FACTORY)
933 		pblk_setup_uuid(pblk);
934 
935 #ifdef CONFIG_NVM_DEBUG
936 	atomic_long_set(&pblk->inflight_writes, 0);
937 	atomic_long_set(&pblk->padded_writes, 0);
938 	atomic_long_set(&pblk->padded_wb, 0);
939 	atomic_long_set(&pblk->nr_flush, 0);
940 	atomic_long_set(&pblk->req_writes, 0);
941 	atomic_long_set(&pblk->sub_writes, 0);
942 	atomic_long_set(&pblk->sync_writes, 0);
943 	atomic_long_set(&pblk->inflight_reads, 0);
944 	atomic_long_set(&pblk->cache_reads, 0);
945 	atomic_long_set(&pblk->sync_reads, 0);
946 	atomic_long_set(&pblk->recov_writes, 0);
947 	atomic_long_set(&pblk->recov_writes, 0);
948 	atomic_long_set(&pblk->recov_gc_writes, 0);
949 	atomic_long_set(&pblk->recov_gc_reads, 0);
950 #endif
951 
952 	atomic_long_set(&pblk->read_failed, 0);
953 	atomic_long_set(&pblk->read_empty, 0);
954 	atomic_long_set(&pblk->read_high_ecc, 0);
955 	atomic_long_set(&pblk->read_failed_gc, 0);
956 	atomic_long_set(&pblk->write_failed, 0);
957 	atomic_long_set(&pblk->erase_failed, 0);
958 
959 	ret = pblk_luns_init(pblk, dev->luns);
960 	if (ret) {
961 		pr_err("pblk: could not initialize luns\n");
962 		goto fail;
963 	}
964 
965 	ret = pblk_lines_init(pblk);
966 	if (ret) {
967 		pr_err("pblk: could not initialize lines\n");
968 		goto fail_free_luns;
969 	}
970 
971 	ret = pblk_core_init(pblk);
972 	if (ret) {
973 		pr_err("pblk: could not initialize core\n");
974 		goto fail_free_line_meta;
975 	}
976 
977 	ret = pblk_l2p_init(pblk);
978 	if (ret) {
979 		pr_err("pblk: could not initialize maps\n");
980 		goto fail_free_core;
981 	}
982 
983 	ret = pblk_lines_configure(pblk, flags);
984 	if (ret) {
985 		pr_err("pblk: could not configure lines\n");
986 		goto fail_free_l2p;
987 	}
988 
989 	ret = pblk_writer_init(pblk);
990 	if (ret) {
991 		pr_err("pblk: could not initialize write thread\n");
992 		goto fail_free_lines;
993 	}
994 
995 	ret = pblk_gc_init(pblk);
996 	if (ret) {
997 		pr_err("pblk: could not initialize gc\n");
998 		goto fail_stop_writer;
999 	}
1000 
1001 	/* inherit the size from the underlying device */
1002 	blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue));
1003 	blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));
1004 
1005 	blk_queue_write_cache(tqueue, true, false);
1006 
1007 	tqueue->limits.discard_granularity = geo->pgs_per_blk * geo->pfpg_size;
1008 	tqueue->limits.discard_alignment = 0;
1009 	blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9);
1010 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue);
1011 
1012 	pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
1013 			geo->nr_luns, pblk->l_mg.nr_lines,
1014 			(unsigned long long)pblk->rl.nr_secs,
1015 			pblk->rwb.nr_entries);
1016 
1017 	wake_up_process(pblk->writer_ts);
1018 	return pblk;
1019 
1020 fail_stop_writer:
1021 	pblk_writer_stop(pblk);
1022 fail_free_lines:
1023 	pblk_lines_free(pblk);
1024 fail_free_l2p:
1025 	pblk_l2p_free(pblk);
1026 fail_free_core:
1027 	pblk_core_free(pblk);
1028 fail_free_line_meta:
1029 	pblk_line_meta_free(pblk);
1030 fail_free_luns:
1031 	pblk_luns_free(pblk);
1032 fail:
1033 	kfree(pblk);
1034 	return ERR_PTR(ret);
1035 }
1036 
1037 /* physical block device target */
1038 static struct nvm_tgt_type tt_pblk = {
1039 	.name		= "pblk",
1040 	.version	= {1, 0, 0},
1041 
1042 	.make_rq	= pblk_make_rq,
1043 	.capacity	= pblk_capacity,
1044 
1045 	.init		= pblk_init,
1046 	.exit		= pblk_exit,
1047 
1048 	.sysfs_init	= pblk_sysfs_init,
1049 	.sysfs_exit	= pblk_sysfs_exit,
1050 };
1051 
pblk_module_init(void)1052 static int __init pblk_module_init(void)
1053 {
1054 	int ret;
1055 
1056 	pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
1057 	if (!pblk_bio_set)
1058 		return -ENOMEM;
1059 	ret = nvm_register_tgt_type(&tt_pblk);
1060 	if (ret)
1061 		bioset_free(pblk_bio_set);
1062 	return ret;
1063 }
1064 
pblk_module_exit(void)1065 static void pblk_module_exit(void)
1066 {
1067 	bioset_free(pblk_bio_set);
1068 	nvm_unregister_tgt_type(&tt_pblk);
1069 }
1070 
1071 module_init(pblk_module_init);
1072 module_exit(pblk_module_exit);
1073 MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
1074 MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
1075 MODULE_LICENSE("GPL v2");
1076 MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");
1077