1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
4 */
5
6 #ifndef _NVMET_H
7 #define _NVMET_H
8
9 #include <linux/dma-mapping.h>
10 #include <linux/types.h>
11 #include <linux/device.h>
12 #include <linux/kref.h>
13 #include <linux/percpu-refcount.h>
14 #include <linux/list.h>
15 #include <linux/mutex.h>
16 #include <linux/uuid.h>
17 #include <linux/nvme.h>
18 #include <linux/configfs.h>
19 #include <linux/rcupdate.h>
20 #include <linux/blkdev.h>
21 #include <linux/radix-tree.h>
22
23 #define NVMET_ASYNC_EVENTS 4
24 #define NVMET_ERROR_LOG_SLOTS 128
25 #define NVMET_NO_ERROR_LOC ((u16)-1)
26
27 /*
28 * Supported optional AENs:
29 */
30 #define NVMET_AEN_CFG_OPTIONAL \
31 (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_ANA_CHANGE)
32 #define NVMET_DISC_AEN_CFG_OPTIONAL \
33 (NVME_AEN_CFG_DISC_CHANGE)
34
35 /*
36 * Plus mandatory SMART AENs (we'll never send them, but allow enabling them):
37 */
38 #define NVMET_AEN_CFG_ALL \
39 (NVME_SMART_CRIT_SPARE | NVME_SMART_CRIT_TEMPERATURE | \
40 NVME_SMART_CRIT_RELIABILITY | NVME_SMART_CRIT_MEDIA | \
41 NVME_SMART_CRIT_VOLATILE_MEMORY | NVMET_AEN_CFG_OPTIONAL)
42
43 /* Helper Macros when NVMe error is NVME_SC_CONNECT_INVALID_PARAM
44 * The 16 bit shift is to set IATTR bit to 1, which means offending
45 * offset starts in the data section of connect()
46 */
47 #define IPO_IATTR_CONNECT_DATA(x) \
48 (cpu_to_le32((1 << 16) | (offsetof(struct nvmf_connect_data, x))))
49 #define IPO_IATTR_CONNECT_SQE(x) \
50 (cpu_to_le32(offsetof(struct nvmf_connect_command, x)))
51
52 struct nvmet_ns {
53 struct list_head dev_link;
54 struct percpu_ref ref;
55 struct block_device *bdev;
56 struct file *file;
57 bool readonly;
58 u32 nsid;
59 u32 blksize_shift;
60 loff_t size;
61 u8 nguid[16];
62 uuid_t uuid;
63 u32 anagrpid;
64
65 bool buffered_io;
66 bool enabled;
67 struct nvmet_subsys *subsys;
68 const char *device_path;
69
70 struct config_group device_group;
71 struct config_group group;
72
73 struct completion disable_done;
74 mempool_t *bvec_pool;
75 struct kmem_cache *bvec_cache;
76
77 int use_p2pmem;
78 struct pci_dev *p2p_dev;
79 };
80
to_nvmet_ns(struct config_item * item)81 static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
82 {
83 return container_of(to_config_group(item), struct nvmet_ns, group);
84 }
85
nvmet_ns_dev(struct nvmet_ns * ns)86 static inline struct device *nvmet_ns_dev(struct nvmet_ns *ns)
87 {
88 return ns->bdev ? disk_to_dev(ns->bdev->bd_disk) : NULL;
89 }
90
91 struct nvmet_cq {
92 u16 qid;
93 u16 size;
94 };
95
96 struct nvmet_sq {
97 struct nvmet_ctrl *ctrl;
98 struct percpu_ref ref;
99 u16 qid;
100 u16 size;
101 u32 sqhd;
102 bool sqhd_disabled;
103 struct completion free_done;
104 struct completion confirm_done;
105 };
106
107 struct nvmet_ana_group {
108 struct config_group group;
109 struct nvmet_port *port;
110 u32 grpid;
111 };
112
to_ana_group(struct config_item * item)113 static inline struct nvmet_ana_group *to_ana_group(struct config_item *item)
114 {
115 return container_of(to_config_group(item), struct nvmet_ana_group,
116 group);
117 }
118
119 /**
120 * struct nvmet_port - Common structure to keep port
121 * information for the target.
122 * @entry: Entry into referrals or transport list.
123 * @disc_addr: Address information is stored in a format defined
124 * for a discovery log page entry.
125 * @group: ConfigFS group for this element's folder.
126 * @priv: Private data for the transport.
127 */
128 struct nvmet_port {
129 struct list_head entry;
130 struct nvmf_disc_rsp_page_entry disc_addr;
131 struct config_group group;
132 struct config_group subsys_group;
133 struct list_head subsystems;
134 struct config_group referrals_group;
135 struct list_head referrals;
136 struct list_head global_entry;
137 struct config_group ana_groups_group;
138 struct nvmet_ana_group ana_default_group;
139 enum nvme_ana_state *ana_state;
140 void *priv;
141 bool enabled;
142 int inline_data_size;
143 const struct nvmet_fabrics_ops *tr_ops;
144 };
145
to_nvmet_port(struct config_item * item)146 static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
147 {
148 return container_of(to_config_group(item), struct nvmet_port,
149 group);
150 }
151
ana_groups_to_port(struct config_item * item)152 static inline struct nvmet_port *ana_groups_to_port(
153 struct config_item *item)
154 {
155 return container_of(to_config_group(item), struct nvmet_port,
156 ana_groups_group);
157 }
158
159 struct nvmet_ctrl {
160 struct nvmet_subsys *subsys;
161 struct nvmet_cq **cqs;
162 struct nvmet_sq **sqs;
163
164 bool cmd_seen;
165
166 struct mutex lock;
167 u64 cap;
168 u32 cc;
169 u32 csts;
170
171 uuid_t hostid;
172 u16 cntlid;
173 u32 kato;
174
175 struct nvmet_port *port;
176
177 u32 aen_enabled;
178 unsigned long aen_masked;
179 struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS];
180 unsigned int nr_async_event_cmds;
181 struct list_head async_events;
182 struct work_struct async_event_work;
183
184 struct list_head subsys_entry;
185 struct kref ref;
186 struct delayed_work ka_work;
187 struct work_struct fatal_err_work;
188
189 const struct nvmet_fabrics_ops *ops;
190
191 __le32 *changed_ns_list;
192 u32 nr_changed_ns;
193
194 char subsysnqn[NVMF_NQN_FIELD_LEN];
195 char hostnqn[NVMF_NQN_FIELD_LEN];
196
197 struct device *p2p_client;
198 struct radix_tree_root p2p_ns_map;
199
200 spinlock_t error_lock;
201 u64 err_counter;
202 struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
203 };
204
205 struct nvmet_subsys {
206 enum nvme_subsys_type type;
207
208 struct mutex lock;
209 struct kref ref;
210
211 struct list_head namespaces;
212 unsigned int nr_namespaces;
213 unsigned int max_nsid;
214
215 struct list_head ctrls;
216
217 struct list_head hosts;
218 bool allow_any_host;
219
220 u16 max_qid;
221
222 u64 ver;
223 u64 serial;
224 char *subsysnqn;
225
226 struct config_group group;
227
228 struct config_group namespaces_group;
229 struct config_group allowed_hosts_group;
230 };
231
to_subsys(struct config_item * item)232 static inline struct nvmet_subsys *to_subsys(struct config_item *item)
233 {
234 return container_of(to_config_group(item), struct nvmet_subsys, group);
235 }
236
namespaces_to_subsys(struct config_item * item)237 static inline struct nvmet_subsys *namespaces_to_subsys(
238 struct config_item *item)
239 {
240 return container_of(to_config_group(item), struct nvmet_subsys,
241 namespaces_group);
242 }
243
244 struct nvmet_host {
245 struct config_group group;
246 };
247
to_host(struct config_item * item)248 static inline struct nvmet_host *to_host(struct config_item *item)
249 {
250 return container_of(to_config_group(item), struct nvmet_host, group);
251 }
252
nvmet_host_name(struct nvmet_host * host)253 static inline char *nvmet_host_name(struct nvmet_host *host)
254 {
255 return config_item_name(&host->group.cg_item);
256 }
257
258 struct nvmet_host_link {
259 struct list_head entry;
260 struct nvmet_host *host;
261 };
262
263 struct nvmet_subsys_link {
264 struct list_head entry;
265 struct nvmet_subsys *subsys;
266 };
267
268 struct nvmet_req;
269 struct nvmet_fabrics_ops {
270 struct module *owner;
271 unsigned int type;
272 unsigned int msdbd;
273 bool has_keyed_sgls : 1;
274 void (*queue_response)(struct nvmet_req *req);
275 int (*add_port)(struct nvmet_port *port);
276 void (*remove_port)(struct nvmet_port *port);
277 void (*delete_ctrl)(struct nvmet_ctrl *ctrl);
278 void (*disc_traddr)(struct nvmet_req *req,
279 struct nvmet_port *port, char *traddr);
280 u16 (*install_queue)(struct nvmet_sq *nvme_sq);
281 void (*discovery_chg)(struct nvmet_port *port);
282 };
283
284 #define NVMET_MAX_INLINE_BIOVEC 8
285 #define NVMET_MAX_INLINE_DATA_LEN NVMET_MAX_INLINE_BIOVEC * PAGE_SIZE
286
287 struct nvmet_req {
288 struct nvme_command *cmd;
289 struct nvme_completion *cqe;
290 struct nvmet_sq *sq;
291 struct nvmet_cq *cq;
292 struct nvmet_ns *ns;
293 struct scatterlist *sg;
294 struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC];
295 union {
296 struct {
297 struct bio inline_bio;
298 } b;
299 struct {
300 bool mpool_alloc;
301 struct kiocb iocb;
302 struct bio_vec *bvec;
303 struct work_struct work;
304 } f;
305 };
306 int sg_cnt;
307 /* data length as parsed from the command: */
308 size_t data_len;
309 /* data length as parsed from the SGL descriptor: */
310 size_t transfer_len;
311
312 struct nvmet_port *port;
313
314 void (*execute)(struct nvmet_req *req);
315 const struct nvmet_fabrics_ops *ops;
316
317 struct pci_dev *p2p_dev;
318 struct device *p2p_client;
319 u16 error_loc;
320 u64 error_slba;
321 };
322
323 extern struct workqueue_struct *buffered_io_wq;
324
nvmet_set_result(struct nvmet_req * req,u32 result)325 static inline void nvmet_set_result(struct nvmet_req *req, u32 result)
326 {
327 req->cqe->result.u32 = cpu_to_le32(result);
328 }
329
330 /*
331 * NVMe command writes actually are DMA reads for us on the target side.
332 */
333 static inline enum dma_data_direction
nvmet_data_dir(struct nvmet_req * req)334 nvmet_data_dir(struct nvmet_req *req)
335 {
336 return nvme_is_write(req->cmd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
337 }
338
339 struct nvmet_async_event {
340 struct list_head entry;
341 u8 event_type;
342 u8 event_info;
343 u8 log_page;
344 };
345
nvmet_clear_aen_bit(struct nvmet_req * req,u32 bn)346 static inline void nvmet_clear_aen_bit(struct nvmet_req *req, u32 bn)
347 {
348 int rae = le32_to_cpu(req->cmd->common.cdw10) & 1 << 15;
349
350 if (!rae)
351 clear_bit(bn, &req->sq->ctrl->aen_masked);
352 }
353
nvmet_aen_bit_disabled(struct nvmet_ctrl * ctrl,u32 bn)354 static inline bool nvmet_aen_bit_disabled(struct nvmet_ctrl *ctrl, u32 bn)
355 {
356 if (!(READ_ONCE(ctrl->aen_enabled) & (1 << bn)))
357 return true;
358 return test_and_set_bit(bn, &ctrl->aen_masked);
359 }
360
361 void nvmet_get_feat_kato(struct nvmet_req *req);
362 void nvmet_get_feat_async_event(struct nvmet_req *req);
363 u16 nvmet_set_feat_kato(struct nvmet_req *req);
364 u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask);
365 void nvmet_execute_async_event(struct nvmet_req *req);
366
367 u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
368 void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id);
369 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
370 u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
371 u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
372 u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
373 u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
374
375 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
376 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
377 void nvmet_req_uninit(struct nvmet_req *req);
378 void nvmet_req_execute(struct nvmet_req *req);
379 void nvmet_req_complete(struct nvmet_req *req, u16 status);
380 int nvmet_req_alloc_sgl(struct nvmet_req *req);
381 void nvmet_req_free_sgl(struct nvmet_req *req);
382
383 void nvmet_execute_keep_alive(struct nvmet_req *req);
384
385 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
386 u16 size);
387 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid,
388 u16 size);
389 void nvmet_sq_destroy(struct nvmet_sq *sq);
390 int nvmet_sq_init(struct nvmet_sq *sq);
391
392 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl);
393
394 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new);
395 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
396 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp);
397 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
398 struct nvmet_req *req, struct nvmet_ctrl **ret);
399 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
400 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd);
401
402 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
403 enum nvme_subsys_type type);
404 void nvmet_subsys_put(struct nvmet_subsys *subsys);
405 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys);
406
407 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid);
408 void nvmet_put_namespace(struct nvmet_ns *ns);
409 int nvmet_ns_enable(struct nvmet_ns *ns);
410 void nvmet_ns_disable(struct nvmet_ns *ns);
411 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid);
412 void nvmet_ns_free(struct nvmet_ns *ns);
413
414 void nvmet_send_ana_event(struct nvmet_subsys *subsys,
415 struct nvmet_port *port);
416 void nvmet_port_send_ana_event(struct nvmet_port *port);
417
418 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops);
419 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops);
420
421 void nvmet_port_del_ctrls(struct nvmet_port *port,
422 struct nvmet_subsys *subsys);
423
424 int nvmet_enable_port(struct nvmet_port *port);
425 void nvmet_disable_port(struct nvmet_port *port);
426
427 void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port);
428 void nvmet_referral_disable(struct nvmet_port *parent, struct nvmet_port *port);
429
430 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
431 size_t len);
432 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf,
433 size_t len);
434 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len);
435
436 u32 nvmet_get_log_page_len(struct nvme_command *cmd);
437 u64 nvmet_get_log_page_offset(struct nvme_command *cmd);
438
439 extern struct list_head *nvmet_ports;
440 void nvmet_port_disc_changed(struct nvmet_port *port,
441 struct nvmet_subsys *subsys);
442 void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys,
443 struct nvmet_host *host);
444 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
445 u8 event_info, u8 log_page);
446
447 #define NVMET_QUEUE_SIZE 1024
448 #define NVMET_NR_QUEUES 128
449 #define NVMET_MAX_CMD NVMET_QUEUE_SIZE
450
451 /*
452 * Nice round number that makes a list of nsids fit into a page.
453 * Should become tunable at some point in the future.
454 */
455 #define NVMET_MAX_NAMESPACES 1024
456
457 /*
458 * 0 is not a valid ANA group ID, so we start numbering at 1.
459 *
460 * ANA Group 1 exists without manual intervention, has namespaces assigned to it
461 * by default, and is available in an optimized state through all ports.
462 */
463 #define NVMET_MAX_ANAGRPS 128
464 #define NVMET_DEFAULT_ANA_GRPID 1
465
466 #define NVMET_KAS 10
467 #define NVMET_DISC_KATO_MS 120000
468
469 int __init nvmet_init_configfs(void);
470 void __exit nvmet_exit_configfs(void);
471
472 int __init nvmet_init_discovery(void);
473 void nvmet_exit_discovery(void);
474
475 extern struct nvmet_subsys *nvmet_disc_subsys;
476 extern struct rw_semaphore nvmet_config_sem;
477
478 extern u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
479 extern u64 nvmet_ana_chgcnt;
480 extern struct rw_semaphore nvmet_ana_sem;
481
482 bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn);
483
484 int nvmet_bdev_ns_enable(struct nvmet_ns *ns);
485 int nvmet_file_ns_enable(struct nvmet_ns *ns);
486 void nvmet_bdev_ns_disable(struct nvmet_ns *ns);
487 void nvmet_file_ns_disable(struct nvmet_ns *ns);
488 u16 nvmet_bdev_flush(struct nvmet_req *req);
489 u16 nvmet_file_flush(struct nvmet_req *req);
490 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
491
nvmet_rw_len(struct nvmet_req * req)492 static inline u32 nvmet_rw_len(struct nvmet_req *req)
493 {
494 return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
495 req->ns->blksize_shift;
496 }
497
498 u16 errno_to_nvme_status(struct nvmet_req *req, int errno);
499
500 /* Convert a 32-bit number to a 16-bit 0's based number */
to0based(u32 a)501 static inline __le16 to0based(u32 a)
502 {
503 return cpu_to_le16(max(1U, min(1U << 16, a)) - 1);
504 }
505
506 #endif /* _NVMET_H */
507