1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Block driver for s390 storage class memory.
4 *
5 * Copyright IBM Corp. 2012
6 * Author(s): Sebastian Ott <sebott@linux.vnet.ibm.com>
7 */
8
9 #define KMSG_COMPONENT "scm_block"
10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
11
12 #include <linux/interrupt.h>
13 #include <linux/spinlock.h>
14 #include <linux/mempool.h>
15 #include <linux/module.h>
16 #include <linux/blkdev.h>
17 #include <linux/blk-mq.h>
18 #include <linux/genhd.h>
19 #include <linux/slab.h>
20 #include <linux/list.h>
21 #include <linux/io.h>
22 #include <asm/eadm.h>
23 #include "scm_blk.h"
24
25 debug_info_t *scm_debug;
26 static int scm_major;
27 static mempool_t *aidaw_pool;
28 static DEFINE_SPINLOCK(list_lock);
29 static LIST_HEAD(inactive_requests);
30 static unsigned int nr_requests = 64;
31 static unsigned int nr_requests_per_io = 8;
32 static atomic_t nr_devices = ATOMIC_INIT(0);
33 module_param(nr_requests, uint, S_IRUGO);
34 MODULE_PARM_DESC(nr_requests, "Number of parallel requests.");
35
36 module_param(nr_requests_per_io, uint, S_IRUGO);
37 MODULE_PARM_DESC(nr_requests_per_io, "Number of requests per IO.");
38
39 MODULE_DESCRIPTION("Block driver for s390 storage class memory.");
40 MODULE_LICENSE("GPL");
41 MODULE_ALIAS("scm:scmdev*");
42
__scm_free_rq(struct scm_request * scmrq)43 static void __scm_free_rq(struct scm_request *scmrq)
44 {
45 struct aob_rq_header *aobrq = to_aobrq(scmrq);
46
47 free_page((unsigned long) scmrq->aob);
48 kfree(scmrq->request);
49 kfree(aobrq);
50 }
51
scm_free_rqs(void)52 static void scm_free_rqs(void)
53 {
54 struct list_head *iter, *safe;
55 struct scm_request *scmrq;
56
57 spin_lock_irq(&list_lock);
58 list_for_each_safe(iter, safe, &inactive_requests) {
59 scmrq = list_entry(iter, struct scm_request, list);
60 list_del(&scmrq->list);
61 __scm_free_rq(scmrq);
62 }
63 spin_unlock_irq(&list_lock);
64
65 mempool_destroy(aidaw_pool);
66 }
67
__scm_alloc_rq(void)68 static int __scm_alloc_rq(void)
69 {
70 struct aob_rq_header *aobrq;
71 struct scm_request *scmrq;
72
73 aobrq = kzalloc(sizeof(*aobrq) + sizeof(*scmrq), GFP_KERNEL);
74 if (!aobrq)
75 return -ENOMEM;
76
77 scmrq = (void *) aobrq->data;
78 scmrq->aob = (void *) get_zeroed_page(GFP_DMA);
79 if (!scmrq->aob)
80 goto free;
81
82 scmrq->request = kcalloc(nr_requests_per_io, sizeof(scmrq->request[0]),
83 GFP_KERNEL);
84 if (!scmrq->request)
85 goto free;
86
87 INIT_LIST_HEAD(&scmrq->list);
88 spin_lock_irq(&list_lock);
89 list_add(&scmrq->list, &inactive_requests);
90 spin_unlock_irq(&list_lock);
91
92 return 0;
93 free:
94 __scm_free_rq(scmrq);
95 return -ENOMEM;
96 }
97
scm_alloc_rqs(unsigned int nrqs)98 static int scm_alloc_rqs(unsigned int nrqs)
99 {
100 int ret = 0;
101
102 aidaw_pool = mempool_create_page_pool(max(nrqs/8, 1U), 0);
103 if (!aidaw_pool)
104 return -ENOMEM;
105
106 while (nrqs-- && !ret)
107 ret = __scm_alloc_rq();
108
109 return ret;
110 }
111
scm_request_fetch(void)112 static struct scm_request *scm_request_fetch(void)
113 {
114 struct scm_request *scmrq = NULL;
115
116 spin_lock_irq(&list_lock);
117 if (list_empty(&inactive_requests))
118 goto out;
119 scmrq = list_first_entry(&inactive_requests, struct scm_request, list);
120 list_del(&scmrq->list);
121 out:
122 spin_unlock_irq(&list_lock);
123 return scmrq;
124 }
125
scm_request_done(struct scm_request * scmrq)126 static void scm_request_done(struct scm_request *scmrq)
127 {
128 unsigned long flags;
129 struct msb *msb;
130 u64 aidaw;
131 int i;
132
133 for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
134 msb = &scmrq->aob->msb[i];
135 aidaw = (u64)phys_to_virt(msb->data_addr);
136
137 if ((msb->flags & MSB_FLAG_IDA) && aidaw &&
138 IS_ALIGNED(aidaw, PAGE_SIZE))
139 mempool_free(virt_to_page(aidaw), aidaw_pool);
140 }
141
142 spin_lock_irqsave(&list_lock, flags);
143 list_add(&scmrq->list, &inactive_requests);
144 spin_unlock_irqrestore(&list_lock, flags);
145 }
146
scm_permit_request(struct scm_blk_dev * bdev,struct request * req)147 static bool scm_permit_request(struct scm_blk_dev *bdev, struct request *req)
148 {
149 return rq_data_dir(req) != WRITE || bdev->state != SCM_WR_PROHIBIT;
150 }
151
scm_aidaw_alloc(void)152 static inline struct aidaw *scm_aidaw_alloc(void)
153 {
154 struct page *page = mempool_alloc(aidaw_pool, GFP_ATOMIC);
155
156 return page ? page_address(page) : NULL;
157 }
158
scm_aidaw_bytes(struct aidaw * aidaw)159 static inline unsigned long scm_aidaw_bytes(struct aidaw *aidaw)
160 {
161 unsigned long _aidaw = (unsigned long) aidaw;
162 unsigned long bytes = ALIGN(_aidaw, PAGE_SIZE) - _aidaw;
163
164 return (bytes / sizeof(*aidaw)) * PAGE_SIZE;
165 }
166
scm_aidaw_fetch(struct scm_request * scmrq,unsigned int bytes)167 struct aidaw *scm_aidaw_fetch(struct scm_request *scmrq, unsigned int bytes)
168 {
169 struct aidaw *aidaw;
170
171 if (scm_aidaw_bytes(scmrq->next_aidaw) >= bytes)
172 return scmrq->next_aidaw;
173
174 aidaw = scm_aidaw_alloc();
175 if (aidaw)
176 memset(aidaw, 0, PAGE_SIZE);
177 return aidaw;
178 }
179
scm_request_prepare(struct scm_request * scmrq)180 static int scm_request_prepare(struct scm_request *scmrq)
181 {
182 struct scm_blk_dev *bdev = scmrq->bdev;
183 struct scm_device *scmdev = bdev->gendisk->private_data;
184 int pos = scmrq->aob->request.msb_count;
185 struct msb *msb = &scmrq->aob->msb[pos];
186 struct request *req = scmrq->request[pos];
187 struct req_iterator iter;
188 struct aidaw *aidaw;
189 struct bio_vec bv;
190
191 aidaw = scm_aidaw_fetch(scmrq, blk_rq_bytes(req));
192 if (!aidaw)
193 return -ENOMEM;
194
195 msb->bs = MSB_BS_4K;
196 scmrq->aob->request.msb_count++;
197 msb->scm_addr = scmdev->address + ((u64) blk_rq_pos(req) << 9);
198 msb->oc = (rq_data_dir(req) == READ) ? MSB_OC_READ : MSB_OC_WRITE;
199 msb->flags |= MSB_FLAG_IDA;
200 msb->data_addr = (u64)virt_to_phys(aidaw);
201
202 rq_for_each_segment(bv, req, iter) {
203 WARN_ON(bv.bv_offset);
204 msb->blk_count += bv.bv_len >> 12;
205 aidaw->data_addr = virt_to_phys(page_address(bv.bv_page));
206 aidaw++;
207 }
208
209 scmrq->next_aidaw = aidaw;
210 return 0;
211 }
212
scm_request_set(struct scm_request * scmrq,struct request * req)213 static inline void scm_request_set(struct scm_request *scmrq,
214 struct request *req)
215 {
216 scmrq->request[scmrq->aob->request.msb_count] = req;
217 }
218
scm_request_init(struct scm_blk_dev * bdev,struct scm_request * scmrq)219 static inline void scm_request_init(struct scm_blk_dev *bdev,
220 struct scm_request *scmrq)
221 {
222 struct aob_rq_header *aobrq = to_aobrq(scmrq);
223 struct aob *aob = scmrq->aob;
224
225 memset(scmrq->request, 0,
226 nr_requests_per_io * sizeof(scmrq->request[0]));
227 memset(aob, 0, sizeof(*aob));
228 aobrq->scmdev = bdev->scmdev;
229 aob->request.cmd_code = ARQB_CMD_MOVE;
230 aob->request.data = (u64) aobrq;
231 scmrq->bdev = bdev;
232 scmrq->retries = 4;
233 scmrq->error = BLK_STS_OK;
234 /* We don't use all msbs - place aidaws at the end of the aob page. */
235 scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
236 }
237
scm_request_requeue(struct scm_request * scmrq)238 static void scm_request_requeue(struct scm_request *scmrq)
239 {
240 struct scm_blk_dev *bdev = scmrq->bdev;
241 int i;
242
243 for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++)
244 blk_mq_requeue_request(scmrq->request[i], false);
245
246 atomic_dec(&bdev->queued_reqs);
247 scm_request_done(scmrq);
248 blk_mq_kick_requeue_list(bdev->rq);
249 }
250
scm_request_finish(struct scm_request * scmrq)251 static void scm_request_finish(struct scm_request *scmrq)
252 {
253 struct scm_blk_dev *bdev = scmrq->bdev;
254 blk_status_t *error;
255 int i;
256
257 for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
258 error = blk_mq_rq_to_pdu(scmrq->request[i]);
259 *error = scmrq->error;
260 if (likely(!blk_should_fake_timeout(scmrq->request[i]->q)))
261 blk_mq_complete_request(scmrq->request[i]);
262 }
263
264 atomic_dec(&bdev->queued_reqs);
265 scm_request_done(scmrq);
266 }
267
scm_request_start(struct scm_request * scmrq)268 static void scm_request_start(struct scm_request *scmrq)
269 {
270 struct scm_blk_dev *bdev = scmrq->bdev;
271
272 atomic_inc(&bdev->queued_reqs);
273 if (eadm_start_aob(scmrq->aob)) {
274 SCM_LOG(5, "no subchannel");
275 scm_request_requeue(scmrq);
276 }
277 }
278
279 struct scm_queue {
280 struct scm_request *scmrq;
281 spinlock_t lock;
282 };
283
scm_blk_request(struct blk_mq_hw_ctx * hctx,const struct blk_mq_queue_data * qd)284 static blk_status_t scm_blk_request(struct blk_mq_hw_ctx *hctx,
285 const struct blk_mq_queue_data *qd)
286 {
287 struct scm_device *scmdev = hctx->queue->queuedata;
288 struct scm_blk_dev *bdev = dev_get_drvdata(&scmdev->dev);
289 struct scm_queue *sq = hctx->driver_data;
290 struct request *req = qd->rq;
291 struct scm_request *scmrq;
292
293 spin_lock(&sq->lock);
294 if (!scm_permit_request(bdev, req)) {
295 spin_unlock(&sq->lock);
296 return BLK_STS_RESOURCE;
297 }
298
299 scmrq = sq->scmrq;
300 if (!scmrq) {
301 scmrq = scm_request_fetch();
302 if (!scmrq) {
303 SCM_LOG(5, "no request");
304 spin_unlock(&sq->lock);
305 return BLK_STS_RESOURCE;
306 }
307 scm_request_init(bdev, scmrq);
308 sq->scmrq = scmrq;
309 }
310 scm_request_set(scmrq, req);
311
312 if (scm_request_prepare(scmrq)) {
313 SCM_LOG(5, "aidaw alloc failed");
314 scm_request_set(scmrq, NULL);
315
316 if (scmrq->aob->request.msb_count)
317 scm_request_start(scmrq);
318
319 sq->scmrq = NULL;
320 spin_unlock(&sq->lock);
321 return BLK_STS_RESOURCE;
322 }
323 blk_mq_start_request(req);
324
325 if (qd->last || scmrq->aob->request.msb_count == nr_requests_per_io) {
326 scm_request_start(scmrq);
327 sq->scmrq = NULL;
328 }
329 spin_unlock(&sq->lock);
330 return BLK_STS_OK;
331 }
332
scm_blk_init_hctx(struct blk_mq_hw_ctx * hctx,void * data,unsigned int idx)333 static int scm_blk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
334 unsigned int idx)
335 {
336 struct scm_queue *qd = kzalloc(sizeof(*qd), GFP_KERNEL);
337
338 if (!qd)
339 return -ENOMEM;
340
341 spin_lock_init(&qd->lock);
342 hctx->driver_data = qd;
343
344 return 0;
345 }
346
scm_blk_exit_hctx(struct blk_mq_hw_ctx * hctx,unsigned int idx)347 static void scm_blk_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int idx)
348 {
349 struct scm_queue *qd = hctx->driver_data;
350
351 WARN_ON(qd->scmrq);
352 kfree(hctx->driver_data);
353 hctx->driver_data = NULL;
354 }
355
__scmrq_log_error(struct scm_request * scmrq)356 static void __scmrq_log_error(struct scm_request *scmrq)
357 {
358 struct aob *aob = scmrq->aob;
359
360 if (scmrq->error == BLK_STS_TIMEOUT)
361 SCM_LOG(1, "Request timeout");
362 else {
363 SCM_LOG(1, "Request error");
364 SCM_LOG_HEX(1, &aob->response, sizeof(aob->response));
365 }
366 if (scmrq->retries)
367 SCM_LOG(1, "Retry request");
368 else
369 pr_err("An I/O operation to SCM failed with rc=%d\n",
370 scmrq->error);
371 }
372
scm_blk_handle_error(struct scm_request * scmrq)373 static void scm_blk_handle_error(struct scm_request *scmrq)
374 {
375 struct scm_blk_dev *bdev = scmrq->bdev;
376 unsigned long flags;
377
378 if (scmrq->error != BLK_STS_IOERR)
379 goto restart;
380
381 /* For -EIO the response block is valid. */
382 switch (scmrq->aob->response.eqc) {
383 case EQC_WR_PROHIBIT:
384 spin_lock_irqsave(&bdev->lock, flags);
385 if (bdev->state != SCM_WR_PROHIBIT)
386 pr_info("%lx: Write access to the SCM increment is suspended\n",
387 (unsigned long) bdev->scmdev->address);
388 bdev->state = SCM_WR_PROHIBIT;
389 spin_unlock_irqrestore(&bdev->lock, flags);
390 goto requeue;
391 default:
392 break;
393 }
394
395 restart:
396 if (!eadm_start_aob(scmrq->aob))
397 return;
398
399 requeue:
400 scm_request_requeue(scmrq);
401 }
402
scm_blk_irq(struct scm_device * scmdev,void * data,blk_status_t error)403 void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
404 {
405 struct scm_request *scmrq = data;
406
407 scmrq->error = error;
408 if (error) {
409 __scmrq_log_error(scmrq);
410 if (scmrq->retries-- > 0) {
411 scm_blk_handle_error(scmrq);
412 return;
413 }
414 }
415
416 scm_request_finish(scmrq);
417 }
418
scm_blk_request_done(struct request * req)419 static void scm_blk_request_done(struct request *req)
420 {
421 blk_status_t *error = blk_mq_rq_to_pdu(req);
422
423 blk_mq_end_request(req, *error);
424 }
425
426 static const struct block_device_operations scm_blk_devops = {
427 .owner = THIS_MODULE,
428 };
429
430 static const struct blk_mq_ops scm_mq_ops = {
431 .queue_rq = scm_blk_request,
432 .complete = scm_blk_request_done,
433 .init_hctx = scm_blk_init_hctx,
434 .exit_hctx = scm_blk_exit_hctx,
435 };
436
scm_blk_dev_setup(struct scm_blk_dev * bdev,struct scm_device * scmdev)437 int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
438 {
439 unsigned int devindex, nr_max_blk;
440 struct request_queue *rq;
441 int len, ret;
442
443 devindex = atomic_inc_return(&nr_devices) - 1;
444 /* scma..scmz + scmaa..scmzz */
445 if (devindex > 701) {
446 ret = -ENODEV;
447 goto out;
448 }
449
450 bdev->scmdev = scmdev;
451 bdev->state = SCM_OPER;
452 spin_lock_init(&bdev->lock);
453 atomic_set(&bdev->queued_reqs, 0);
454
455 bdev->tag_set.ops = &scm_mq_ops;
456 bdev->tag_set.cmd_size = sizeof(blk_status_t);
457 bdev->tag_set.nr_hw_queues = nr_requests;
458 bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests;
459 bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
460 bdev->tag_set.numa_node = NUMA_NO_NODE;
461
462 ret = blk_mq_alloc_tag_set(&bdev->tag_set);
463 if (ret)
464 goto out;
465
466 bdev->gendisk = blk_mq_alloc_disk(&bdev->tag_set, scmdev);
467 if (IS_ERR(bdev->gendisk)) {
468 ret = PTR_ERR(bdev->gendisk);
469 goto out_tag;
470 }
471 rq = bdev->rq = bdev->gendisk->queue;
472 nr_max_blk = min(scmdev->nr_max_block,
473 (unsigned int) (PAGE_SIZE / sizeof(struct aidaw)));
474
475 blk_queue_logical_block_size(rq, 1 << 12);
476 blk_queue_max_hw_sectors(rq, nr_max_blk << 3); /* 8 * 512 = blk_size */
477 blk_queue_max_segments(rq, nr_max_blk);
478 blk_queue_flag_set(QUEUE_FLAG_NONROT, rq);
479 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq);
480
481 bdev->gendisk->private_data = scmdev;
482 bdev->gendisk->fops = &scm_blk_devops;
483 bdev->gendisk->major = scm_major;
484 bdev->gendisk->first_minor = devindex * SCM_NR_PARTS;
485 bdev->gendisk->minors = SCM_NR_PARTS;
486
487 len = snprintf(bdev->gendisk->disk_name, DISK_NAME_LEN, "scm");
488 if (devindex > 25) {
489 len += snprintf(bdev->gendisk->disk_name + len,
490 DISK_NAME_LEN - len, "%c",
491 'a' + (devindex / 26) - 1);
492 devindex = devindex % 26;
493 }
494 snprintf(bdev->gendisk->disk_name + len, DISK_NAME_LEN - len, "%c",
495 'a' + devindex);
496
497 /* 512 byte sectors */
498 set_capacity(bdev->gendisk, scmdev->size >> 9);
499 device_add_disk(&scmdev->dev, bdev->gendisk, NULL);
500 return 0;
501
502 out_tag:
503 blk_mq_free_tag_set(&bdev->tag_set);
504 out:
505 atomic_dec(&nr_devices);
506 return ret;
507 }
508
scm_blk_dev_cleanup(struct scm_blk_dev * bdev)509 void scm_blk_dev_cleanup(struct scm_blk_dev *bdev)
510 {
511 del_gendisk(bdev->gendisk);
512 blk_cleanup_disk(bdev->gendisk);
513 blk_mq_free_tag_set(&bdev->tag_set);
514 }
515
scm_blk_set_available(struct scm_blk_dev * bdev)516 void scm_blk_set_available(struct scm_blk_dev *bdev)
517 {
518 unsigned long flags;
519
520 spin_lock_irqsave(&bdev->lock, flags);
521 if (bdev->state == SCM_WR_PROHIBIT)
522 pr_info("%lx: Write access to the SCM increment is restored\n",
523 (unsigned long) bdev->scmdev->address);
524 bdev->state = SCM_OPER;
525 spin_unlock_irqrestore(&bdev->lock, flags);
526 }
527
scm_blk_params_valid(void)528 static bool __init scm_blk_params_valid(void)
529 {
530 if (!nr_requests_per_io || nr_requests_per_io > 64)
531 return false;
532
533 return true;
534 }
535
scm_blk_init(void)536 static int __init scm_blk_init(void)
537 {
538 int ret = -EINVAL;
539
540 if (!scm_blk_params_valid())
541 goto out;
542
543 ret = register_blkdev(0, "scm");
544 if (ret < 0)
545 goto out;
546
547 scm_major = ret;
548 ret = scm_alloc_rqs(nr_requests);
549 if (ret)
550 goto out_free;
551
552 scm_debug = debug_register("scm_log", 16, 1, 16);
553 if (!scm_debug) {
554 ret = -ENOMEM;
555 goto out_free;
556 }
557
558 debug_register_view(scm_debug, &debug_hex_ascii_view);
559 debug_set_level(scm_debug, 2);
560
561 ret = scm_drv_init();
562 if (ret)
563 goto out_dbf;
564
565 return ret;
566
567 out_dbf:
568 debug_unregister(scm_debug);
569 out_free:
570 scm_free_rqs();
571 unregister_blkdev(scm_major, "scm");
572 out:
573 return ret;
574 }
575 module_init(scm_blk_init);
576
scm_blk_cleanup(void)577 static void __exit scm_blk_cleanup(void)
578 {
579 scm_drv_cleanup();
580 debug_unregister(scm_debug);
581 scm_free_rqs();
582 unregister_blkdev(scm_major, "scm");
583 }
584 module_exit(scm_blk_cleanup);
585