• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2018 Christoph Hellwig.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  */
13 
14 #include <linux/backing-dev.h>
15 #include <linux/moduleparam.h>
16 #include <trace/events/block.h>
17 #include "nvme.h"
18 
19 static bool multipath = true;
20 module_param(multipath, bool, 0444);
21 MODULE_PARM_DESC(multipath,
22 	"turn on native support for multiple controllers per subsystem");
23 
nvme_mpath_unfreeze(struct nvme_subsystem * subsys)24 void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
25 {
26 	struct nvme_ns_head *h;
27 
28 	lockdep_assert_held(&subsys->lock);
29 	list_for_each_entry(h, &subsys->nsheads, entry)
30 		if (h->disk)
31 			blk_mq_unfreeze_queue(h->disk->queue);
32 }
33 
nvme_mpath_wait_freeze(struct nvme_subsystem * subsys)34 void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
35 {
36 	struct nvme_ns_head *h;
37 
38 	lockdep_assert_held(&subsys->lock);
39 	list_for_each_entry(h, &subsys->nsheads, entry)
40 		if (h->disk)
41 			blk_mq_freeze_queue_wait(h->disk->queue);
42 }
43 
nvme_mpath_start_freeze(struct nvme_subsystem * subsys)44 void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
45 {
46 	struct nvme_ns_head *h;
47 
48 	lockdep_assert_held(&subsys->lock);
49 	list_for_each_entry(h, &subsys->nsheads, entry)
50 		if (h->disk)
51 			blk_freeze_queue_start(h->disk->queue);
52 }
53 
54 /*
55  * If multipathing is enabled we need to always use the subsystem instance
56  * number for numbering our devices to avoid conflicts between subsystems that
57  * have multiple controllers and thus use the multipath-aware subsystem node
58  * and those that have a single controller and use the controller node
59  * directly.
60  */
nvme_set_disk_name(char * disk_name,struct nvme_ns * ns,struct nvme_ctrl * ctrl,int * flags)61 void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
62 			struct nvme_ctrl *ctrl, int *flags)
63 {
64 	if (!multipath) {
65 		sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
66 	} else if (ns->head->disk) {
67 		sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
68 				ctrl->cntlid, ns->head->instance);
69 		*flags = GENHD_FL_HIDDEN;
70 	} else {
71 		sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
72 				ns->head->instance);
73 	}
74 }
75 
nvme_failover_req(struct request * req)76 bool nvme_failover_req(struct request *req)
77 {
78 	struct nvme_ns *ns = req->q->queuedata;
79 	u16 status = nvme_req(req)->status;
80 	unsigned long flags;
81 
82 	switch (status & 0x7ff) {
83 	case NVME_SC_ANA_TRANSITION:
84 	case NVME_SC_ANA_INACCESSIBLE:
85 	case NVME_SC_ANA_PERSISTENT_LOSS:
86 		/*
87 		 * If we got back an ANA error we know the controller is alive,
88 		 * but not ready to serve this namespaces.  The spec suggests
89 		 * we should update our general state here, but due to the fact
90 		 * that the admin and I/O queues are not serialized that is
91 		 * fundamentally racy.  So instead just clear the current path,
92 		 * mark the the path as pending and kick of a re-read of the ANA
93 		 * log page ASAP.
94 		 */
95 		nvme_mpath_clear_current_path(ns);
96 		if (ns->ctrl->ana_log_buf) {
97 			set_bit(NVME_NS_ANA_PENDING, &ns->flags);
98 			queue_work(nvme_wq, &ns->ctrl->ana_work);
99 		}
100 		break;
101 	case NVME_SC_HOST_PATH_ERROR:
102 		/*
103 		 * Temporary transport disruption in talking to the controller.
104 		 * Try to send on a new path.
105 		 */
106 		nvme_mpath_clear_current_path(ns);
107 		break;
108 	default:
109 		/* This was a non-ANA error so follow the normal error path. */
110 		return false;
111 	}
112 
113 	spin_lock_irqsave(&ns->head->requeue_lock, flags);
114 	blk_steal_bios(&ns->head->requeue_list, req);
115 	spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
116 	blk_mq_end_request(req, 0);
117 
118 	kblockd_schedule_work(&ns->head->requeue_work);
119 	return true;
120 }
121 
nvme_kick_requeue_lists(struct nvme_ctrl * ctrl)122 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
123 {
124 	struct nvme_ns *ns;
125 
126 	down_read(&ctrl->namespaces_rwsem);
127 	list_for_each_entry(ns, &ctrl->namespaces, list) {
128 		if (ns->head->disk)
129 			kblockd_schedule_work(&ns->head->requeue_work);
130 	}
131 	up_read(&ctrl->namespaces_rwsem);
132 }
133 
134 static const char *nvme_ana_state_names[] = {
135 	[0]				= "invalid state",
136 	[NVME_ANA_OPTIMIZED]		= "optimized",
137 	[NVME_ANA_NONOPTIMIZED]		= "non-optimized",
138 	[NVME_ANA_INACCESSIBLE]		= "inaccessible",
139 	[NVME_ANA_PERSISTENT_LOSS]	= "persistent-loss",
140 	[NVME_ANA_CHANGE]		= "change",
141 };
142 
__nvme_find_path(struct nvme_ns_head * head)143 static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head)
144 {
145 	struct nvme_ns *ns, *fallback = NULL;
146 
147 	list_for_each_entry_rcu(ns, &head->list, siblings) {
148 		if (ns->ctrl->state != NVME_CTRL_LIVE ||
149 		    test_bit(NVME_NS_ANA_PENDING, &ns->flags))
150 			continue;
151 		switch (ns->ana_state) {
152 		case NVME_ANA_OPTIMIZED:
153 			rcu_assign_pointer(head->current_path, ns);
154 			return ns;
155 		case NVME_ANA_NONOPTIMIZED:
156 			fallback = ns;
157 			break;
158 		default:
159 			break;
160 		}
161 	}
162 
163 	if (fallback)
164 		rcu_assign_pointer(head->current_path, fallback);
165 	return fallback;
166 }
167 
nvme_path_is_optimized(struct nvme_ns * ns)168 static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
169 {
170 	return ns->ctrl->state == NVME_CTRL_LIVE &&
171 		ns->ana_state == NVME_ANA_OPTIMIZED;
172 }
173 
nvme_find_path(struct nvme_ns_head * head)174 inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
175 {
176 	struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu);
177 
178 	if (unlikely(!ns || !nvme_path_is_optimized(ns)))
179 		ns = __nvme_find_path(head);
180 	return ns;
181 }
182 
nvme_ns_head_make_request(struct request_queue * q,struct bio * bio)183 static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
184 		struct bio *bio)
185 {
186 	struct nvme_ns_head *head = q->queuedata;
187 	struct device *dev = disk_to_dev(head->disk);
188 	struct nvme_ns *ns;
189 	blk_qc_t ret = BLK_QC_T_NONE;
190 	int srcu_idx;
191 
192 	srcu_idx = srcu_read_lock(&head->srcu);
193 	ns = nvme_find_path(head);
194 	if (likely(ns)) {
195 		bio->bi_disk = ns->disk;
196 		bio->bi_opf |= REQ_NVME_MPATH;
197 		trace_block_bio_remap(bio->bi_disk->queue, bio,
198 				      disk_devt(ns->head->disk),
199 				      bio->bi_iter.bi_sector);
200 		ret = direct_make_request(bio);
201 	} else if (!list_empty_careful(&head->list)) {
202 		dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
203 
204 		spin_lock_irq(&head->requeue_lock);
205 		bio_list_add(&head->requeue_list, bio);
206 		spin_unlock_irq(&head->requeue_lock);
207 	} else {
208 		dev_warn_ratelimited(dev, "no path - failing I/O\n");
209 
210 		bio->bi_status = BLK_STS_IOERR;
211 		bio_endio(bio);
212 	}
213 
214 	srcu_read_unlock(&head->srcu, srcu_idx);
215 	return ret;
216 }
217 
nvme_ns_head_poll(struct request_queue * q,blk_qc_t qc)218 static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc)
219 {
220 	struct nvme_ns_head *head = q->queuedata;
221 	struct nvme_ns *ns;
222 	bool found = false;
223 	int srcu_idx;
224 
225 	srcu_idx = srcu_read_lock(&head->srcu);
226 	ns = srcu_dereference(head->current_path, &head->srcu);
227 	if (likely(ns && nvme_path_is_optimized(ns)))
228 		found = ns->queue->poll_fn(q, qc);
229 	srcu_read_unlock(&head->srcu, srcu_idx);
230 	return found;
231 }
232 
nvme_requeue_work(struct work_struct * work)233 static void nvme_requeue_work(struct work_struct *work)
234 {
235 	struct nvme_ns_head *head =
236 		container_of(work, struct nvme_ns_head, requeue_work);
237 	struct bio *bio, *next;
238 
239 	spin_lock_irq(&head->requeue_lock);
240 	next = bio_list_get(&head->requeue_list);
241 	spin_unlock_irq(&head->requeue_lock);
242 
243 	while ((bio = next) != NULL) {
244 		next = bio->bi_next;
245 		bio->bi_next = NULL;
246 
247 		/*
248 		 * Reset disk to the mpath node and resubmit to select a new
249 		 * path.
250 		 */
251 		bio->bi_disk = head->disk;
252 		generic_make_request(bio);
253 	}
254 }
255 
nvme_mpath_alloc_disk(struct nvme_ctrl * ctrl,struct nvme_ns_head * head)256 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
257 {
258 	struct request_queue *q;
259 	bool vwc = false;
260 
261 	mutex_init(&head->lock);
262 	bio_list_init(&head->requeue_list);
263 	spin_lock_init(&head->requeue_lock);
264 	INIT_WORK(&head->requeue_work, nvme_requeue_work);
265 
266 	/*
267 	 * Add a multipath node if the subsystems supports multiple controllers.
268 	 * We also do this for private namespaces as the namespace sharing data could
269 	 * change after a rescan.
270 	 */
271 	if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
272 		return 0;
273 
274 	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
275 	if (!q)
276 		goto out;
277 	q->queuedata = head;
278 	blk_queue_make_request(q, nvme_ns_head_make_request);
279 	q->poll_fn = nvme_ns_head_poll;
280 	blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
281 	/* set to a default value for 512 until disk is validated */
282 	blk_queue_logical_block_size(q, 512);
283 	blk_set_stacking_limits(&q->limits);
284 
285 	/* we need to propagate up the VMC settings */
286 	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
287 		vwc = true;
288 	blk_queue_write_cache(q, vwc, vwc);
289 
290 	head->disk = alloc_disk(0);
291 	if (!head->disk)
292 		goto out_cleanup_queue;
293 	head->disk->fops = &nvme_ns_head_ops;
294 	head->disk->private_data = head;
295 	head->disk->queue = q;
296 	head->disk->flags = GENHD_FL_EXT_DEVT;
297 	sprintf(head->disk->disk_name, "nvme%dn%d",
298 			ctrl->subsys->instance, head->instance);
299 	return 0;
300 
301 out_cleanup_queue:
302 	blk_cleanup_queue(q);
303 out:
304 	return -ENOMEM;
305 }
306 
nvme_mpath_set_live(struct nvme_ns * ns)307 static void nvme_mpath_set_live(struct nvme_ns *ns)
308 {
309 	struct nvme_ns_head *head = ns->head;
310 
311 	lockdep_assert_held(&ns->head->lock);
312 
313 	if (!head->disk)
314 		return;
315 
316 	if (!(head->disk->flags & GENHD_FL_UP)) {
317 		device_add_disk(&head->subsys->dev, head->disk);
318 		if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
319 				&nvme_ns_id_attr_group))
320 			dev_warn(&head->subsys->dev,
321 				 "failed to create id group.\n");
322 	}
323 
324 	synchronize_srcu(&ns->head->srcu);
325 	kblockd_schedule_work(&ns->head->requeue_work);
326 }
327 
nvme_parse_ana_log(struct nvme_ctrl * ctrl,void * data,int (* cb)(struct nvme_ctrl * ctrl,struct nvme_ana_group_desc *,void *))328 static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
329 		int (*cb)(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *,
330 			void *))
331 {
332 	void *base = ctrl->ana_log_buf;
333 	size_t offset = sizeof(struct nvme_ana_rsp_hdr);
334 	int error, i;
335 
336 	lockdep_assert_held(&ctrl->ana_lock);
337 
338 	for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
339 		struct nvme_ana_group_desc *desc = base + offset;
340 		u32 nr_nsids = le32_to_cpu(desc->nnsids);
341 		size_t nsid_buf_size = nr_nsids * sizeof(__le32);
342 
343 		if (WARN_ON_ONCE(desc->grpid == 0))
344 			return -EINVAL;
345 		if (WARN_ON_ONCE(le32_to_cpu(desc->grpid) > ctrl->anagrpmax))
346 			return -EINVAL;
347 		if (WARN_ON_ONCE(desc->state == 0))
348 			return -EINVAL;
349 		if (WARN_ON_ONCE(desc->state > NVME_ANA_CHANGE))
350 			return -EINVAL;
351 
352 		offset += sizeof(*desc);
353 		if (WARN_ON_ONCE(offset > ctrl->ana_log_size - nsid_buf_size))
354 			return -EINVAL;
355 
356 		error = cb(ctrl, desc, data);
357 		if (error)
358 			return error;
359 
360 		offset += nsid_buf_size;
361 		if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
362 			return -EINVAL;
363 	}
364 
365 	return 0;
366 }
367 
nvme_state_is_live(enum nvme_ana_state state)368 static inline bool nvme_state_is_live(enum nvme_ana_state state)
369 {
370 	return state == NVME_ANA_OPTIMIZED || state == NVME_ANA_NONOPTIMIZED;
371 }
372 
nvme_update_ns_ana_state(struct nvme_ana_group_desc * desc,struct nvme_ns * ns)373 static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
374 		struct nvme_ns *ns)
375 {
376 	mutex_lock(&ns->head->lock);
377 	ns->ana_grpid = le32_to_cpu(desc->grpid);
378 	ns->ana_state = desc->state;
379 	clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
380 
381 	if (nvme_state_is_live(ns->ana_state))
382 		nvme_mpath_set_live(ns);
383 	mutex_unlock(&ns->head->lock);
384 }
385 
nvme_update_ana_state(struct nvme_ctrl * ctrl,struct nvme_ana_group_desc * desc,void * data)386 static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
387 		struct nvme_ana_group_desc *desc, void *data)
388 {
389 	u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
390 	unsigned *nr_change_groups = data;
391 	struct nvme_ns *ns;
392 
393 	dev_info(ctrl->device, "ANA group %d: %s.\n",
394 			le32_to_cpu(desc->grpid),
395 			nvme_ana_state_names[desc->state]);
396 
397 	if (desc->state == NVME_ANA_CHANGE)
398 		(*nr_change_groups)++;
399 
400 	if (!nr_nsids)
401 		return 0;
402 
403 	down_read(&ctrl->namespaces_rwsem);
404 	list_for_each_entry(ns, &ctrl->namespaces, list) {
405 		unsigned nsid = le32_to_cpu(desc->nsids[n]);
406 
407 		if (ns->head->ns_id < nsid)
408 			continue;
409 		if (ns->head->ns_id == nsid)
410 			nvme_update_ns_ana_state(desc, ns);
411 		if (++n == nr_nsids)
412 			break;
413 	}
414 	up_read(&ctrl->namespaces_rwsem);
415 	return 0;
416 }
417 
nvme_read_ana_log(struct nvme_ctrl * ctrl,bool groups_only)418 static int nvme_read_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
419 {
420 	u32 nr_change_groups = 0;
421 	int error;
422 
423 	mutex_lock(&ctrl->ana_lock);
424 	error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA,
425 			groups_only ? NVME_ANA_LOG_RGO : 0,
426 			ctrl->ana_log_buf, ctrl->ana_log_size, 0);
427 	if (error) {
428 		dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error);
429 		goto out_unlock;
430 	}
431 
432 	error = nvme_parse_ana_log(ctrl, &nr_change_groups,
433 			nvme_update_ana_state);
434 	if (error)
435 		goto out_unlock;
436 
437 	/*
438 	 * In theory we should have an ANATT timer per group as they might enter
439 	 * the change state at different times.  But that is a lot of overhead
440 	 * just to protect against a target that keeps entering new changes
441 	 * states while never finishing previous ones.  But we'll still
442 	 * eventually time out once all groups are in change state, so this
443 	 * isn't a big deal.
444 	 *
445 	 * We also double the ANATT value to provide some slack for transports
446 	 * or AEN processing overhead.
447 	 */
448 	if (nr_change_groups)
449 		mod_timer(&ctrl->anatt_timer, ctrl->anatt * HZ * 2 + jiffies);
450 	else
451 		del_timer_sync(&ctrl->anatt_timer);
452 out_unlock:
453 	mutex_unlock(&ctrl->ana_lock);
454 	return error;
455 }
456 
nvme_ana_work(struct work_struct * work)457 static void nvme_ana_work(struct work_struct *work)
458 {
459 	struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work);
460 
461 	nvme_read_ana_log(ctrl, false);
462 }
463 
nvme_anatt_timeout(struct timer_list * t)464 static void nvme_anatt_timeout(struct timer_list *t)
465 {
466 	struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer);
467 
468 	dev_info(ctrl->device, "ANATT timeout, resetting controller.\n");
469 	nvme_reset_ctrl(ctrl);
470 }
471 
nvme_mpath_stop(struct nvme_ctrl * ctrl)472 void nvme_mpath_stop(struct nvme_ctrl *ctrl)
473 {
474 	if (!nvme_ctrl_use_ana(ctrl))
475 		return;
476 	del_timer_sync(&ctrl->anatt_timer);
477 	cancel_work_sync(&ctrl->ana_work);
478 }
479 
ana_grpid_show(struct device * dev,struct device_attribute * attr,char * buf)480 static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
481 		char *buf)
482 {
483 	return sprintf(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid);
484 }
485 DEVICE_ATTR_RO(ana_grpid);
486 
ana_state_show(struct device * dev,struct device_attribute * attr,char * buf)487 static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
488 		char *buf)
489 {
490 	struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
491 
492 	return sprintf(buf, "%s\n", nvme_ana_state_names[ns->ana_state]);
493 }
494 DEVICE_ATTR_RO(ana_state);
495 
nvme_lookup_ana_group_desc(struct nvme_ctrl * ctrl,struct nvme_ana_group_desc * desc,void * data)496 static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
497 		struct nvme_ana_group_desc *desc, void *data)
498 {
499 	struct nvme_ana_group_desc *dst = data;
500 
501 	if (desc->grpid != dst->grpid)
502 		return 0;
503 
504 	*dst = *desc;
505 	return -ENXIO; /* just break out of the loop */
506 }
507 
nvme_mpath_add_disk(struct nvme_ns * ns,struct nvme_id_ns * id)508 void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
509 {
510 	if (nvme_ctrl_use_ana(ns->ctrl)) {
511 		struct nvme_ana_group_desc desc = {
512 			.grpid = id->anagrpid,
513 			.state = 0,
514 		};
515 
516 		mutex_lock(&ns->ctrl->ana_lock);
517 		ns->ana_grpid = le32_to_cpu(id->anagrpid);
518 		nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
519 		mutex_unlock(&ns->ctrl->ana_lock);
520 		if (desc.state) {
521 			/* found the group desc: update */
522 			nvme_update_ns_ana_state(&desc, ns);
523 		}
524 	} else {
525 		mutex_lock(&ns->head->lock);
526 		ns->ana_state = NVME_ANA_OPTIMIZED;
527 		nvme_mpath_set_live(ns);
528 		mutex_unlock(&ns->head->lock);
529 	}
530 
531 	if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
532 		struct gendisk *disk = ns->head->disk;
533 
534 		if (disk)
535 			disk->queue->backing_dev_info->capabilities |=
536 					BDI_CAP_STABLE_WRITES;
537 	}
538 }
539 
nvme_mpath_remove_disk(struct nvme_ns_head * head)540 void nvme_mpath_remove_disk(struct nvme_ns_head *head)
541 {
542 	if (!head->disk)
543 		return;
544 	if (head->disk->flags & GENHD_FL_UP) {
545 		sysfs_remove_group(&disk_to_dev(head->disk)->kobj,
546 				   &nvme_ns_id_attr_group);
547 		del_gendisk(head->disk);
548 	}
549 	blk_set_queue_dying(head->disk->queue);
550 	/* make sure all pending bios are cleaned up */
551 	kblockd_schedule_work(&head->requeue_work);
552 	flush_work(&head->requeue_work);
553 	blk_cleanup_queue(head->disk->queue);
554 	put_disk(head->disk);
555 }
556 
nvme_mpath_init(struct nvme_ctrl * ctrl,struct nvme_id_ctrl * id)557 int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
558 {
559 	int error;
560 
561 	/* check if multipath is enabled and we have the capability */
562 	if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3)))
563 		return 0;
564 
565 	ctrl->anacap = id->anacap;
566 	ctrl->anatt = id->anatt;
567 	ctrl->nanagrpid = le32_to_cpu(id->nanagrpid);
568 	ctrl->anagrpmax = le32_to_cpu(id->anagrpmax);
569 
570 	mutex_init(&ctrl->ana_lock);
571 	timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
572 	ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
573 		ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc);
574 	ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32);
575 
576 	if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) {
577 		dev_err(ctrl->device,
578 			"ANA log page size (%zd) larger than MDTS (%d).\n",
579 			ctrl->ana_log_size,
580 			ctrl->max_hw_sectors << SECTOR_SHIFT);
581 		dev_err(ctrl->device, "disabling ANA support.\n");
582 		return 0;
583 	}
584 
585 	INIT_WORK(&ctrl->ana_work, nvme_ana_work);
586 	kfree(ctrl->ana_log_buf);
587 	ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
588 	if (!ctrl->ana_log_buf) {
589 		error = -ENOMEM;
590 		goto out;
591 	}
592 
593 	error = nvme_read_ana_log(ctrl, false);
594 	if (error)
595 		goto out_free_ana_log_buf;
596 	return 0;
597 out_free_ana_log_buf:
598 	kfree(ctrl->ana_log_buf);
599 	ctrl->ana_log_buf = NULL;
600 out:
601 	return error;
602 }
603 
nvme_mpath_uninit(struct nvme_ctrl * ctrl)604 void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
605 {
606 	kfree(ctrl->ana_log_buf);
607 	ctrl->ana_log_buf = NULL;
608 }
609 
610