Lines Matching +full:permit +full:-
1 // SPDX-License-Identifier: GPL-2.0-or-later
5 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
6 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
7 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
17 #include "rtrs-clt.h"
18 #include "rtrs-log.h"
53 list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) in rtrs_clt_is_connected()
54 connected |= READ_ONCE(sess->state) == RTRS_CLT_CONNECTED; in rtrs_clt_is_connected()
63 size_t max_depth = clt->queue_depth; in __rtrs_get_permit()
64 struct rtrs_permit *permit; in __rtrs_get_permit() local
75 bit = find_first_zero_bit(clt->permits_map, max_depth); in __rtrs_get_permit()
78 } while (unlikely(test_and_set_bit_lock(bit, clt->permits_map))); in __rtrs_get_permit()
80 permit = get_permit(clt, bit); in __rtrs_get_permit()
81 WARN_ON(permit->mem_id != bit); in __rtrs_get_permit()
82 permit->cpu_id = raw_smp_processor_id(); in __rtrs_get_permit()
83 permit->con_type = con_type; in __rtrs_get_permit()
85 return permit; in __rtrs_get_permit()
89 struct rtrs_permit *permit) in __rtrs_put_permit() argument
91 clear_bit_unlock(permit->mem_id, clt->permits_map); in __rtrs_put_permit()
95 * rtrs_clt_get_permit() - allocates permit for future RDMA operation
97 * @con_type: Type of connection to use with the permit
101 * Allocates permit for the following RDMA operation. Permit is used
112 struct rtrs_permit *permit; in rtrs_clt_get_permit() local
115 permit = __rtrs_get_permit(clt, con_type); in rtrs_clt_get_permit()
116 if (likely(permit) || !can_wait) in rtrs_clt_get_permit()
117 return permit; in rtrs_clt_get_permit()
120 prepare_to_wait(&clt->permits_wait, &wait, in rtrs_clt_get_permit()
122 permit = __rtrs_get_permit(clt, con_type); in rtrs_clt_get_permit()
123 if (likely(permit)) in rtrs_clt_get_permit()
129 finish_wait(&clt->permits_wait, &wait); in rtrs_clt_get_permit()
131 return permit; in rtrs_clt_get_permit()
136 * rtrs_clt_put_permit() - puts allocated permit
138 * @permit: Permit to be freed
143 void rtrs_clt_put_permit(struct rtrs_clt *clt, struct rtrs_permit *permit) in rtrs_clt_put_permit() argument
145 if (WARN_ON(!test_bit(permit->mem_id, clt->permits_map))) in rtrs_clt_put_permit()
148 __rtrs_put_permit(clt, permit); in rtrs_clt_put_permit()
151 * rtrs_clt_get_permit() adds itself to the &clt->permits_wait list in rtrs_clt_put_permit()
153 * it must have added itself to &clt->permits_wait before in rtrs_clt_put_permit()
157 if (waitqueue_active(&clt->permits_wait)) in rtrs_clt_put_permit()
158 wake_up(&clt->permits_wait); in rtrs_clt_put_permit()
162 void *rtrs_permit_to_pdu(struct rtrs_permit *permit) in rtrs_permit_to_pdu() argument
164 return permit + 1; in rtrs_permit_to_pdu()
169 * rtrs_permit_to_clt_con() - returns RDMA connection pointer by the permit
171 * @permit: permit for the allocation of the RDMA buffer
178 struct rtrs_permit *permit) in rtrs_permit_to_clt_con() argument
182 if (likely(permit->con_type == RTRS_IO_CON)) in rtrs_permit_to_clt_con()
183 id = (permit->cpu_id % (sess->s.con_num - 1)) + 1; in rtrs_permit_to_clt_con()
185 return to_clt_con(sess->s.con[id]); in rtrs_permit_to_clt_con()
189 * __rtrs_clt_change_state() - change the session state through session state
206 lockdep_assert_held(&sess->state_wq.lock); in __rtrs_clt_change_state()
208 old_state = sess->state; in __rtrs_clt_change_state()
282 sess->state = new_state; in __rtrs_clt_change_state()
283 wake_up_locked(&sess->state_wq); in __rtrs_clt_change_state()
295 spin_lock_irq(&sess->state_wq.lock); in rtrs_clt_change_state_from_to()
296 if (sess->state == old_state) in rtrs_clt_change_state_from_to()
298 spin_unlock_irq(&sess->state_wq.lock); in rtrs_clt_change_state_from_to()
305 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in rtrs_rdma_error_recovery()
310 struct rtrs_clt *clt = sess->clt; in rtrs_rdma_error_recovery()
316 delay_ms = clt->reconnect_delay_sec * 1000; in rtrs_rdma_error_recovery()
317 queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, in rtrs_rdma_error_recovery()
334 struct rtrs_clt_con *con = cq->cq_context; in rtrs_clt_fast_reg_done()
336 if (unlikely(wc->status != IB_WC_SUCCESS)) { in rtrs_clt_fast_reg_done()
337 rtrs_err(con->c.sess, "Failed IB_WR_REG_MR: %s\n", in rtrs_clt_fast_reg_done()
338 ib_wc_status_msg(wc->status)); in rtrs_clt_fast_reg_done()
353 container_of(wc->wr_cqe, typeof(*req), inv_cqe); in rtrs_clt_inv_rkey_done()
354 struct rtrs_clt_con *con = cq->cq_context; in rtrs_clt_inv_rkey_done()
356 if (unlikely(wc->status != IB_WC_SUCCESS)) { in rtrs_clt_inv_rkey_done()
357 rtrs_err(con->c.sess, "Failed IB_WR_LOCAL_INV: %s\n", in rtrs_clt_inv_rkey_done()
358 ib_wc_status_msg(wc->status)); in rtrs_clt_inv_rkey_done()
361 req->need_inv = false; in rtrs_clt_inv_rkey_done()
362 if (likely(req->need_inv_comp)) in rtrs_clt_inv_rkey_done()
363 complete(&req->inv_comp); in rtrs_clt_inv_rkey_done()
366 complete_rdma_req(req, req->inv_errno, true, false); in rtrs_clt_inv_rkey_done()
371 struct rtrs_clt_con *con = req->con; in rtrs_inv_rkey()
374 .wr_cqe = &req->inv_cqe, in rtrs_inv_rkey()
376 .ex.invalidate_rkey = req->mr->rkey, in rtrs_inv_rkey()
378 req->inv_cqe.done = rtrs_clt_inv_rkey_done; in rtrs_inv_rkey()
380 return ib_post_send(con->c.qp, &wr, NULL); in rtrs_inv_rkey()
386 struct rtrs_clt_con *con = req->con; in complete_rdma_req()
390 if (!req->in_use) in complete_rdma_req()
392 if (WARN_ON(!req->con)) in complete_rdma_req()
394 sess = to_clt_sess(con->c.sess); in complete_rdma_req()
396 if (req->sg_cnt) { in complete_rdma_req()
397 if (unlikely(req->dir == DMA_FROM_DEVICE && req->need_inv)) { in complete_rdma_req()
413 req->need_inv_comp = true; in complete_rdma_req()
418 req->inv_errno = errno; in complete_rdma_req()
423 rtrs_err(con->c.sess, "Send INV WR key=%#x: %d\n", in complete_rdma_req()
424 req->mr->rkey, err); in complete_rdma_req()
426 wait_for_completion(&req->inv_comp); in complete_rdma_req()
437 ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist, in complete_rdma_req()
438 req->sg_cnt, req->dir); in complete_rdma_req()
440 if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT) in complete_rdma_req()
441 atomic_dec(&sess->stats->inflight); in complete_rdma_req()
443 req->in_use = false; in complete_rdma_req()
444 req->con = NULL; in complete_rdma_req()
447 req->conf(req->priv, errno); in complete_rdma_req()
455 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in rtrs_post_send_rdma()
459 if (unlikely(!req->sg_size)) { in rtrs_post_send_rdma()
460 rtrs_wrn(con->c.sess, in rtrs_post_send_rdma()
462 return -EINVAL; in rtrs_post_send_rdma()
466 sge.addr = req->iu->dma_addr; in rtrs_post_send_rdma()
467 sge.length = req->sg_size; in rtrs_post_send_rdma()
468 sge.lkey = sess->s.dev->ib_pd->local_dma_lkey; in rtrs_post_send_rdma()
474 flags = atomic_inc_return(&con->io_cnt) % sess->queue_depth ? in rtrs_post_send_rdma()
477 ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr, in rtrs_post_send_rdma()
478 req->sg_size, DMA_TO_DEVICE); in rtrs_post_send_rdma()
480 return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, &sge, 1, in rtrs_post_send_rdma()
481 rbuf->rkey, rbuf->addr + off, in rtrs_post_send_rdma()
490 if (WARN_ON(msg_id >= sess->queue_depth)) in process_io_rsp()
493 req = &sess->reqs[msg_id]; in process_io_rsp()
495 req->need_inv &= !w_inval; in process_io_rsp()
503 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in rtrs_clt_recv_done()
505 WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); in rtrs_clt_recv_done()
506 iu = container_of(wc->wr_cqe, struct rtrs_iu, in rtrs_clt_recv_done()
508 err = rtrs_iu_post_recv(&con->c, iu); in rtrs_clt_recv_done()
510 rtrs_err(con->c.sess, "post iu failed %d\n", err); in rtrs_clt_recv_done()
517 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in rtrs_clt_rkey_rsp_done()
525 WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); in rtrs_clt_rkey_rsp_done()
527 iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); in rtrs_clt_rkey_rsp_done()
529 if (unlikely(wc->byte_len < sizeof(*msg))) { in rtrs_clt_rkey_rsp_done()
530 rtrs_err(con->c.sess, "rkey response is malformed: size %d\n", in rtrs_clt_rkey_rsp_done()
531 wc->byte_len); in rtrs_clt_rkey_rsp_done()
534 ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr, in rtrs_clt_rkey_rsp_done()
535 iu->size, DMA_FROM_DEVICE); in rtrs_clt_rkey_rsp_done()
536 msg = iu->buf; in rtrs_clt_rkey_rsp_done()
537 if (unlikely(le16_to_cpu(msg->type) != RTRS_MSG_RKEY_RSP)) { in rtrs_clt_rkey_rsp_done()
538 rtrs_err(sess->clt, "rkey response is malformed: type %d\n", in rtrs_clt_rkey_rsp_done()
539 le16_to_cpu(msg->type)); in rtrs_clt_rkey_rsp_done()
542 buf_id = le16_to_cpu(msg->buf_id); in rtrs_clt_rkey_rsp_done()
543 if (WARN_ON(buf_id >= sess->queue_depth)) in rtrs_clt_rkey_rsp_done()
546 rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), &imm_type, &imm_payload); in rtrs_clt_rkey_rsp_done()
556 sess->rbufs[buf_id].rkey = le32_to_cpu(msg->rkey); in rtrs_clt_rkey_rsp_done()
559 ib_dma_sync_single_for_device(sess->s.dev->ib_dev, iu->dma_addr, in rtrs_clt_rkey_rsp_done()
560 iu->size, DMA_FROM_DEVICE); in rtrs_clt_rkey_rsp_done()
584 wr->wr_cqe = cqe; in rtrs_post_recv_empty_x2()
587 wr->next = &wr_arr[i - 1]; in rtrs_post_recv_empty_x2()
590 return ib_post_recv(con->qp, wr, NULL); in rtrs_post_recv_empty_x2()
595 struct rtrs_clt_con *con = cq->cq_context; in rtrs_clt_rdma_done()
596 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in rtrs_clt_rdma_done()
601 if (unlikely(wc->status != IB_WC_SUCCESS)) { in rtrs_clt_rdma_done()
602 if (wc->status != IB_WC_WR_FLUSH_ERR) { in rtrs_clt_rdma_done()
603 rtrs_err(sess->clt, "RDMA failed: %s\n", in rtrs_clt_rdma_done()
604 ib_wc_status_msg(wc->status)); in rtrs_clt_rdma_done()
611 switch (wc->opcode) { in rtrs_clt_rdma_done()
617 if (WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done)) in rtrs_clt_rdma_done()
619 rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), in rtrs_clt_rdma_done()
630 WARN_ON(con->c.cid); in rtrs_clt_rdma_done()
631 rtrs_send_hb_ack(&sess->s); in rtrs_clt_rdma_done()
632 if (sess->flags & RTRS_MSG_NEW_RKEY_F) in rtrs_clt_rdma_done()
635 WARN_ON(con->c.cid); in rtrs_clt_rdma_done()
636 sess->s.hb_missed_cnt = 0; in rtrs_clt_rdma_done()
637 if (sess->flags & RTRS_MSG_NEW_RKEY_F) in rtrs_clt_rdma_done()
640 rtrs_wrn(con->c.sess, "Unknown IMM type %u\n", in rtrs_clt_rdma_done()
648 err = rtrs_post_recv_empty_x2(&con->c, &io_comp_cqe); in rtrs_clt_rdma_done()
650 err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); in rtrs_clt_rdma_done()
652 rtrs_err(con->c.sess, "rtrs_post_recv_empty(): %d\n", in rtrs_clt_rdma_done()
662 WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE || in rtrs_clt_rdma_done()
663 wc->wc_flags & IB_WC_WITH_IMM)); in rtrs_clt_rdma_done()
664 WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done); in rtrs_clt_rdma_done()
665 if (sess->flags & RTRS_MSG_NEW_RKEY_F) { in rtrs_clt_rdma_done()
666 if (wc->wc_flags & IB_WC_WITH_INVALIDATE) in rtrs_clt_rdma_done()
679 rtrs_wrn(sess->clt, "Unexpected WC type: %d\n", wc->opcode); in rtrs_clt_rdma_done()
687 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in post_recv_io()
690 if (sess->flags & RTRS_MSG_NEW_RKEY_F) { in post_recv_io()
691 struct rtrs_iu *iu = &con->rsp_ius[i]; in post_recv_io()
693 err = rtrs_iu_post_recv(&con->c, iu); in post_recv_io()
695 err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); in post_recv_io()
709 for (cid = 0; cid < sess->s.con_num; cid++) { in post_recv_sess()
713 q_size = sess->queue_depth; in post_recv_sess()
721 err = post_recv_io(to_clt_con(sess->s.con[cid]), q_size); in post_recv_sess()
723 rtrs_err(sess->clt, "post_recv_io(), err: %d\n", err); in post_recv_sess()
739 * list_next_or_null_rr_rcu - get next list element in round-robin fashion.
745 * Next element returned in round-robin fashion, i.e. head will be skipped,
748 * This primitive may safely run concurrently with the _rcu list-mutation
754 list_next_or_null_rcu(head, READ_ONCE((ptr)->next), \
759 * get_next_path_rr() - Returns path in round-robin fashion.
773 clt = it->clt; in get_next_path_rr()
781 ppcpu_path = this_cpu_ptr(clt->pcpu_path); in get_next_path_rr()
784 path = list_first_or_null_rcu(&clt->paths_list, in get_next_path_rr()
787 path = list_next_or_null_rr_rcu(&clt->paths_list, in get_next_path_rr()
788 &path->s.entry, in get_next_path_rr()
797 * get_next_path_min_inflight() - Returns path with minimal inflight count.
808 struct rtrs_clt *clt = it->clt; in get_next_path_min_inflight()
813 list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) { in get_next_path_min_inflight()
814 if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)) in get_next_path_min_inflight()
817 if (unlikely(!list_empty(raw_cpu_ptr(sess->mp_skip_entry)))) in get_next_path_min_inflight()
820 inflight = atomic_read(&sess->stats->inflight); in get_next_path_min_inflight()
833 list_add(raw_cpu_ptr(min_path->mp_skip_entry), &it->skip_list); in get_next_path_min_inflight()
840 INIT_LIST_HEAD(&it->skip_list); in path_it_init()
841 it->clt = clt; in path_it_init()
842 it->i = 0; in path_it_init()
844 if (clt->mp_policy == MP_POLICY_RR) in path_it_init()
845 it->next_path = get_next_path_rr; in path_it_init()
847 it->next_path = get_next_path_min_inflight; in path_it_init()
856 * paths (->mp_skip_entry) into a skip_list again. in path_it_deinit()
858 list_for_each_safe(skip, tmp, &it->skip_list) in path_it_deinit()
866 * the corresponding buffer of rtrs_iu (req->iu->buf), which later on will
871 * @permit: permit for allocation of RDMA remote buffer
883 struct rtrs_permit *permit, void *priv, in rtrs_clt_init_req() argument
891 req->permit = permit; in rtrs_clt_init_req()
892 req->in_use = true; in rtrs_clt_init_req()
893 req->usr_len = usr_len; in rtrs_clt_init_req()
894 req->data_len = data_len; in rtrs_clt_init_req()
895 req->sglist = sg; in rtrs_clt_init_req()
896 req->sg_cnt = sg_cnt; in rtrs_clt_init_req()
897 req->priv = priv; in rtrs_clt_init_req()
898 req->dir = dir; in rtrs_clt_init_req()
899 req->con = rtrs_permit_to_clt_con(sess, permit); in rtrs_clt_init_req()
900 req->conf = conf; in rtrs_clt_init_req()
901 req->need_inv = false; in rtrs_clt_init_req()
902 req->need_inv_comp = false; in rtrs_clt_init_req()
903 req->inv_errno = 0; in rtrs_clt_init_req()
906 len = _copy_from_iter(req->iu->buf, usr_len, &iter); in rtrs_clt_init_req()
909 reinit_completion(&req->inv_comp); in rtrs_clt_init_req()
915 struct rtrs_permit *permit, void *priv, in rtrs_clt_get_req() argument
922 req = &sess->reqs[permit->mem_id]; in rtrs_clt_get_req()
923 rtrs_clt_init_req(req, sess, conf, permit, priv, vec, usr_len, in rtrs_clt_get_req()
934 .iov_base = fail_req->iu->buf, in rtrs_clt_get_copy_req()
935 .iov_len = fail_req->usr_len in rtrs_clt_get_copy_req()
938 req = &alive_sess->reqs[fail_req->permit->mem_id]; in rtrs_clt_get_copy_req()
939 rtrs_clt_init_req(req, alive_sess, fail_req->conf, fail_req->permit, in rtrs_clt_get_copy_req()
940 fail_req->priv, &vec, fail_req->usr_len, in rtrs_clt_get_copy_req()
941 fail_req->sglist, fail_req->sg_cnt, in rtrs_clt_get_copy_req()
942 fail_req->data_len, fail_req->dir); in rtrs_clt_get_copy_req()
951 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in rtrs_post_rdma_write_sg()
952 struct ib_sge *sge = req->sge; in rtrs_post_rdma_write_sg()
958 for_each_sg(req->sglist, sg, req->sg_cnt, i) { in rtrs_post_rdma_write_sg()
961 sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey; in rtrs_post_rdma_write_sg()
963 sge[i].addr = req->iu->dma_addr; in rtrs_post_rdma_write_sg()
965 sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey; in rtrs_post_rdma_write_sg()
967 num_sge = 1 + req->sg_cnt; in rtrs_post_rdma_write_sg()
973 flags = atomic_inc_return(&con->io_cnt) % sess->queue_depth ? in rtrs_post_rdma_write_sg()
976 ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr, in rtrs_post_rdma_write_sg()
979 return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, sge, num_sge, in rtrs_post_rdma_write_sg()
980 rbuf->rkey, rbuf->addr, imm, in rtrs_post_rdma_write_sg()
986 struct rtrs_clt_con *con = req->con; in rtrs_clt_write_req()
987 struct rtrs_sess *s = con->c.sess; in rtrs_clt_write_req()
995 const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len; in rtrs_clt_write_req()
997 if (unlikely(tsize > sess->chunk_size)) { in rtrs_clt_write_req()
999 tsize, sess->chunk_size); in rtrs_clt_write_req()
1000 return -EMSGSIZE; in rtrs_clt_write_req()
1002 if (req->sg_cnt) { in rtrs_clt_write_req()
1003 count = ib_dma_map_sg(sess->s.dev->ib_dev, req->sglist, in rtrs_clt_write_req()
1004 req->sg_cnt, req->dir); in rtrs_clt_write_req()
1007 return -EINVAL; in rtrs_clt_write_req()
1011 msg = req->iu->buf + req->usr_len; in rtrs_clt_write_req()
1012 msg->type = cpu_to_le16(RTRS_MSG_WRITE); in rtrs_clt_write_req()
1013 msg->usr_len = cpu_to_le16(req->usr_len); in rtrs_clt_write_req()
1016 imm = req->permit->mem_off + req->data_len + req->usr_len; in rtrs_clt_write_req()
1018 buf_id = req->permit->mem_id; in rtrs_clt_write_req()
1019 req->sg_size = tsize; in rtrs_clt_write_req()
1020 rbuf = &sess->rbufs[buf_id]; in rtrs_clt_write_req()
1028 ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, in rtrs_clt_write_req()
1029 req->usr_len + sizeof(*msg), in rtrs_clt_write_req()
1033 if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT) in rtrs_clt_write_req()
1034 atomic_dec(&sess->stats->inflight); in rtrs_clt_write_req()
1035 if (req->sg_cnt) in rtrs_clt_write_req()
1036 ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist, in rtrs_clt_write_req()
1037 req->sg_cnt, req->dir); in rtrs_clt_write_req()
1048 nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K); in rtrs_map_sg_fr()
1051 if (unlikely(nr < req->sg_cnt)) in rtrs_map_sg_fr()
1052 return -EINVAL; in rtrs_map_sg_fr()
1053 ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); in rtrs_map_sg_fr()
1060 struct rtrs_clt_con *con = req->con; in rtrs_clt_read_req()
1061 struct rtrs_sess *s = con->c.sess; in rtrs_clt_read_req()
1072 const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len; in rtrs_clt_read_req()
1074 s = &sess->s; in rtrs_clt_read_req()
1075 dev = sess->s.dev; in rtrs_clt_read_req()
1077 if (unlikely(tsize > sess->chunk_size)) { in rtrs_clt_read_req()
1080 tsize, sess->chunk_size); in rtrs_clt_read_req()
1081 return -EMSGSIZE; in rtrs_clt_read_req()
1084 if (req->sg_cnt) { in rtrs_clt_read_req()
1085 count = ib_dma_map_sg(dev->ib_dev, req->sglist, req->sg_cnt, in rtrs_clt_read_req()
1086 req->dir); in rtrs_clt_read_req()
1090 return -EINVAL; in rtrs_clt_read_req()
1093 /* put our message into req->buf after user message*/ in rtrs_clt_read_req()
1094 msg = req->iu->buf + req->usr_len; in rtrs_clt_read_req()
1095 msg->type = cpu_to_le16(RTRS_MSG_READ); in rtrs_clt_read_req()
1096 msg->usr_len = cpu_to_le16(req->usr_len); in rtrs_clt_read_req()
1104 ib_dma_unmap_sg(dev->ib_dev, req->sglist, req->sg_cnt, in rtrs_clt_read_req()
1105 req->dir); in rtrs_clt_read_req()
1111 .mr = req->mr, in rtrs_clt_read_req()
1112 .key = req->mr->rkey, in rtrs_clt_read_req()
1118 msg->sg_cnt = cpu_to_le16(1); in rtrs_clt_read_req()
1119 msg->flags = cpu_to_le16(RTRS_MSG_NEED_INVAL_F); in rtrs_clt_read_req()
1121 msg->desc[0].addr = cpu_to_le64(req->mr->iova); in rtrs_clt_read_req()
1122 msg->desc[0].key = cpu_to_le32(req->mr->rkey); in rtrs_clt_read_req()
1123 msg->desc[0].len = cpu_to_le32(req->mr->length); in rtrs_clt_read_req()
1126 req->need_inv = !!RTRS_MSG_NEED_INVAL_F; in rtrs_clt_read_req()
1129 msg->sg_cnt = 0; in rtrs_clt_read_req()
1130 msg->flags = 0; in rtrs_clt_read_req()
1136 imm = req->permit->mem_off + req->data_len + req->usr_len; in rtrs_clt_read_req()
1138 buf_id = req->permit->mem_id; in rtrs_clt_read_req()
1140 req->sg_size = sizeof(*msg); in rtrs_clt_read_req()
1141 req->sg_size += le16_to_cpu(msg->sg_cnt) * sizeof(struct rtrs_sg_desc); in rtrs_clt_read_req()
1142 req->sg_size += req->usr_len; in rtrs_clt_read_req()
1150 ret = rtrs_post_send_rdma(req->con, req, &sess->rbufs[buf_id], in rtrs_clt_read_req()
1151 req->data_len, imm, wr); in rtrs_clt_read_req()
1154 if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT) in rtrs_clt_read_req()
1155 atomic_dec(&sess->stats->inflight); in rtrs_clt_read_req()
1156 req->need_inv = false; in rtrs_clt_read_req()
1157 if (req->sg_cnt) in rtrs_clt_read_req()
1158 ib_dma_unmap_sg(dev->ib_dev, req->sglist, in rtrs_clt_read_req()
1159 req->sg_cnt, req->dir); in rtrs_clt_read_req()
1175 int err = -ECONNABORTED; in rtrs_clt_failover_req()
1180 (alive_sess = it.next_path(&it)) && it.i < it.clt->paths_num; in rtrs_clt_failover_req()
1182 if (unlikely(READ_ONCE(alive_sess->state) != in rtrs_clt_failover_req()
1186 if (req->dir == DMA_TO_DEVICE) in rtrs_clt_failover_req()
1191 req->in_use = false; in rtrs_clt_failover_req()
1195 rtrs_clt_inc_failover_cnt(alive_sess->stats); in rtrs_clt_failover_req()
1206 struct rtrs_clt *clt = sess->clt; in fail_all_outstanding_reqs()
1210 if (!sess->reqs) in fail_all_outstanding_reqs()
1212 for (i = 0; i < sess->queue_depth; ++i) { in fail_all_outstanding_reqs()
1213 req = &sess->reqs[i]; in fail_all_outstanding_reqs()
1214 if (!req->in_use) in fail_all_outstanding_reqs()
1222 complete_rdma_req(req, -ECONNABORTED, false, true); in fail_all_outstanding_reqs()
1227 req->conf(req->priv, err); in fail_all_outstanding_reqs()
1236 if (!sess->reqs) in free_sess_reqs()
1238 for (i = 0; i < sess->queue_depth; ++i) { in free_sess_reqs()
1239 req = &sess->reqs[i]; in free_sess_reqs()
1240 if (req->mr) in free_sess_reqs()
1241 ib_dereg_mr(req->mr); in free_sess_reqs()
1242 kfree(req->sge); in free_sess_reqs()
1243 rtrs_iu_free(req->iu, sess->s.dev->ib_dev, 1); in free_sess_reqs()
1245 kfree(sess->reqs); in free_sess_reqs()
1246 sess->reqs = NULL; in free_sess_reqs()
1252 struct rtrs_clt *clt = sess->clt; in alloc_sess_reqs()
1253 int i, err = -ENOMEM; in alloc_sess_reqs()
1255 sess->reqs = kcalloc(sess->queue_depth, sizeof(*sess->reqs), in alloc_sess_reqs()
1257 if (!sess->reqs) in alloc_sess_reqs()
1258 return -ENOMEM; in alloc_sess_reqs()
1260 for (i = 0; i < sess->queue_depth; ++i) { in alloc_sess_reqs()
1261 req = &sess->reqs[i]; in alloc_sess_reqs()
1262 req->iu = rtrs_iu_alloc(1, sess->max_hdr_size, GFP_KERNEL, in alloc_sess_reqs()
1263 sess->s.dev->ib_dev, in alloc_sess_reqs()
1266 if (!req->iu) in alloc_sess_reqs()
1269 req->sge = kmalloc_array(clt->max_segments + 1, in alloc_sess_reqs()
1270 sizeof(*req->sge), GFP_KERNEL); in alloc_sess_reqs()
1271 if (!req->sge) in alloc_sess_reqs()
1274 req->mr = ib_alloc_mr(sess->s.dev->ib_pd, IB_MR_TYPE_MEM_REG, in alloc_sess_reqs()
1275 sess->max_pages_per_mr); in alloc_sess_reqs()
1276 if (IS_ERR(req->mr)) { in alloc_sess_reqs()
1277 err = PTR_ERR(req->mr); in alloc_sess_reqs()
1278 req->mr = NULL; in alloc_sess_reqs()
1279 pr_err("Failed to alloc sess->max_pages_per_mr %d\n", in alloc_sess_reqs()
1280 sess->max_pages_per_mr); in alloc_sess_reqs()
1284 init_completion(&req->inv_comp); in alloc_sess_reqs()
1300 clt->permits_map = kcalloc(BITS_TO_LONGS(clt->queue_depth), in alloc_permits()
1302 if (!clt->permits_map) { in alloc_permits()
1303 err = -ENOMEM; in alloc_permits()
1306 clt->permits = kcalloc(clt->queue_depth, permit_size(clt), GFP_KERNEL); in alloc_permits()
1307 if (!clt->permits) { in alloc_permits()
1308 err = -ENOMEM; in alloc_permits()
1311 chunk_bits = ilog2(clt->queue_depth - 1) + 1; in alloc_permits()
1312 for (i = 0; i < clt->queue_depth; i++) { in alloc_permits()
1313 struct rtrs_permit *permit; in alloc_permits() local
1315 permit = get_permit(clt, i); in alloc_permits()
1316 permit->mem_id = i; in alloc_permits()
1317 permit->mem_off = i << (MAX_IMM_PAYL_BITS - chunk_bits); in alloc_permits()
1323 kfree(clt->permits_map); in alloc_permits()
1324 clt->permits_map = NULL; in alloc_permits()
1331 if (clt->permits_map) { in free_permits()
1332 size_t sz = clt->queue_depth; in free_permits()
1334 wait_event(clt->permits_wait, in free_permits()
1335 find_first_bit(clt->permits_map, sz) >= sz); in free_permits()
1337 kfree(clt->permits_map); in free_permits()
1338 clt->permits_map = NULL; in free_permits()
1339 kfree(clt->permits); in free_permits()
1340 clt->permits = NULL; in free_permits()
1349 ib_dev = sess->s.dev->ib_dev; in query_fast_reg_mode()
1356 mr_page_shift = max(12, ffs(ib_dev->attrs.page_size_cap) - 1); in query_fast_reg_mode()
1357 max_pages_per_mr = ib_dev->attrs.max_mr_size; in query_fast_reg_mode()
1359 sess->max_pages_per_mr = in query_fast_reg_mode()
1360 min3(sess->max_pages_per_mr, (u32)max_pages_per_mr, in query_fast_reg_mode()
1361 ib_dev->attrs.max_fast_reg_page_list_len); in query_fast_reg_mode()
1362 sess->max_send_sge = ib_dev->attrs.max_send_sge; in query_fast_reg_mode()
1371 spin_lock_irq(&sess->state_wq.lock); in rtrs_clt_change_state_get_old()
1372 *old_state = sess->state; in rtrs_clt_change_state_get_old()
1374 spin_unlock_irq(&sess->state_wq.lock); in rtrs_clt_change_state_get_old()
1396 rtrs_init_hb(&sess->s, &io_comp_cqe, in rtrs_clt_init_hb()
1405 rtrs_start_hb(&sess->s); in rtrs_clt_start_hb()
1410 rtrs_stop_hb(&sess->s); in rtrs_clt_stop_hb()
1422 int err = -ENOMEM; in alloc_sess()
1432 sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL); in alloc_sess()
1433 if (!sess->s.con) in alloc_sess()
1436 sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL); in alloc_sess()
1437 if (!sess->stats) in alloc_sess()
1440 mutex_init(&sess->init_mutex); in alloc_sess()
1441 uuid_gen(&sess->s.uuid); in alloc_sess()
1442 memcpy(&sess->s.dst_addr, path->dst, in alloc_sess()
1443 rdma_addr_size((struct sockaddr *)path->dst)); in alloc_sess()
1447 * checks the sa_family to be non-zero. If user passed src_addr=NULL in alloc_sess()
1448 * the sess->src_addr will contain only zeros, which is then fine. in alloc_sess()
1450 if (path->src) in alloc_sess()
1451 memcpy(&sess->s.src_addr, path->src, in alloc_sess()
1452 rdma_addr_size((struct sockaddr *)path->src)); in alloc_sess()
1453 strlcpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname)); in alloc_sess()
1454 sess->s.con_num = con_num; in alloc_sess()
1455 sess->clt = clt; in alloc_sess()
1456 sess->max_pages_per_mr = max_segments * max_segment_size >> 12; in alloc_sess()
1457 init_waitqueue_head(&sess->state_wq); in alloc_sess()
1458 sess->state = RTRS_CLT_CONNECTING; in alloc_sess()
1459 atomic_set(&sess->connected_cnt, 0); in alloc_sess()
1460 INIT_WORK(&sess->close_work, rtrs_clt_close_work); in alloc_sess()
1461 INIT_DELAYED_WORK(&sess->reconnect_dwork, rtrs_clt_reconnect_work); in alloc_sess()
1464 sess->mp_skip_entry = alloc_percpu(typeof(*sess->mp_skip_entry)); in alloc_sess()
1465 if (!sess->mp_skip_entry) in alloc_sess()
1469 INIT_LIST_HEAD(per_cpu_ptr(sess->mp_skip_entry, cpu)); in alloc_sess()
1471 err = rtrs_clt_init_stats(sess->stats); in alloc_sess()
1478 free_percpu(sess->mp_skip_entry); in alloc_sess()
1480 kfree(sess->stats); in alloc_sess()
1482 kfree(sess->s.con); in alloc_sess()
1491 free_percpu(sess->mp_skip_entry); in free_sess()
1492 mutex_destroy(&sess->init_mutex); in free_sess()
1493 kfree(sess->s.con); in free_sess()
1494 kfree(sess->rbufs); in free_sess()
1504 return -ENOMEM; in create_con()
1507 con->cpu = (cid ? cid - 1 : 0) % nr_cpu_ids; in create_con()
1508 con->c.cid = cid; in create_con()
1509 con->c.sess = &sess->s; in create_con()
1510 atomic_set(&con->io_cnt, 0); in create_con()
1512 sess->s.con[cid] = &con->c; in create_con()
1519 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in destroy_con()
1521 sess->s.con[con->c.cid] = NULL; in destroy_con()
1527 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in create_con_cq_qp()
1541 if (con->c.cid == 0) { in create_con_cq_qp()
1551 if (WARN_ON(sess->s.dev)) in create_con_cq_qp()
1552 return -EINVAL; in create_con_cq_qp()
1559 sess->s.dev = rtrs_ib_dev_find_or_add(con->c.cm_id->device, in create_con_cq_qp()
1561 if (!sess->s.dev) { in create_con_cq_qp()
1562 rtrs_wrn(sess->clt, in create_con_cq_qp()
1564 return -ENOMEM; in create_con_cq_qp()
1566 sess->s.dev_ref = 1; in create_con_cq_qp()
1574 if (WARN_ON(!sess->s.dev)) in create_con_cq_qp()
1575 return -EINVAL; in create_con_cq_qp()
1576 if (WARN_ON(!sess->queue_depth)) in create_con_cq_qp()
1577 return -EINVAL; in create_con_cq_qp()
1580 sess->s.dev_ref++; in create_con_cq_qp()
1582 min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr, in create_con_cq_qp()
1584 sess->queue_depth * 3 + 1); in create_con_cq_qp()
1586 min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr, in create_con_cq_qp()
1587 sess->queue_depth * 3 + 1); in create_con_cq_qp()
1590 if (sess->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { in create_con_cq_qp()
1591 con->rsp_ius = rtrs_iu_alloc(max_recv_wr, sizeof(*rsp), in create_con_cq_qp()
1592 GFP_KERNEL, sess->s.dev->ib_dev, in create_con_cq_qp()
1595 if (!con->rsp_ius) in create_con_cq_qp()
1596 return -ENOMEM; in create_con_cq_qp()
1597 con->queue_size = max_recv_wr; in create_con_cq_qp()
1600 cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors; in create_con_cq_qp()
1601 err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge, in create_con_cq_qp()
1613 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in destroy_con_cq_qp()
1620 rtrs_cq_qp_destroy(&con->c); in destroy_con_cq_qp()
1621 if (con->rsp_ius) { in destroy_con_cq_qp()
1622 rtrs_iu_free(con->rsp_ius, sess->s.dev->ib_dev, con->queue_size); in destroy_con_cq_qp()
1623 con->rsp_ius = NULL; in destroy_con_cq_qp()
1624 con->queue_size = 0; in destroy_con_cq_qp()
1626 if (sess->s.dev_ref && !--sess->s.dev_ref) { in destroy_con_cq_qp()
1627 rtrs_ib_dev_put(sess->s.dev); in destroy_con_cq_qp()
1628 sess->s.dev = NULL; in destroy_con_cq_qp()
1634 rdma_disconnect(con->c.cm_id); in stop_cm()
1635 if (con->c.qp) in stop_cm()
1636 ib_drain_qp(con->c.qp); in stop_cm()
1641 rdma_destroy_id(con->c.cm_id); in destroy_cm()
1642 con->c.cm_id = NULL; in destroy_cm()
1647 struct rtrs_sess *s = con->c.sess; in rtrs_rdma_addr_resolved()
1655 err = rdma_resolve_route(con->c.cm_id, RTRS_CONNECT_TIMEOUT_MS); in rtrs_rdma_addr_resolved()
1664 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in rtrs_rdma_route_resolved()
1665 struct rtrs_clt *clt = sess->clt; in rtrs_rdma_route_resolved()
1681 .cid = cpu_to_le16(con->c.cid), in rtrs_rdma_route_resolved()
1682 .cid_num = cpu_to_le16(sess->s.con_num), in rtrs_rdma_route_resolved()
1683 .recon_cnt = cpu_to_le16(sess->s.recon_cnt), in rtrs_rdma_route_resolved()
1685 msg.first_conn = sess->for_new_clt ? FIRST_CONN : 0; in rtrs_rdma_route_resolved()
1686 uuid_copy(&msg.sess_uuid, &sess->s.uuid); in rtrs_rdma_route_resolved()
1687 uuid_copy(&msg.paths_uuid, &clt->paths_uuid); in rtrs_rdma_route_resolved()
1689 err = rdma_connect_locked(con->c.cm_id, ¶m); in rtrs_rdma_route_resolved()
1699 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in rtrs_rdma_conn_established()
1700 struct rtrs_clt *clt = sess->clt; in rtrs_rdma_conn_established()
1706 msg = ev->param.conn.private_data; in rtrs_rdma_conn_established()
1707 len = ev->param.conn.private_data_len; in rtrs_rdma_conn_established()
1710 return -ECONNRESET; in rtrs_rdma_conn_established()
1712 if (le16_to_cpu(msg->magic) != RTRS_MAGIC) { in rtrs_rdma_conn_established()
1714 return -ECONNRESET; in rtrs_rdma_conn_established()
1716 version = le16_to_cpu(msg->version); in rtrs_rdma_conn_established()
1720 return -ECONNRESET; in rtrs_rdma_conn_established()
1722 errno = le16_to_cpu(msg->errno); in rtrs_rdma_conn_established()
1726 return -ECONNRESET; in rtrs_rdma_conn_established()
1728 if (con->c.cid == 0) { in rtrs_rdma_conn_established()
1729 queue_depth = le16_to_cpu(msg->queue_depth); in rtrs_rdma_conn_established()
1731 if (sess->queue_depth > 0 && queue_depth != sess->queue_depth) { in rtrs_rdma_conn_established()
1737 sess->reconnect_attempts = -1; in rtrs_rdma_conn_established()
1739 "Disabling auto-reconnect. Trigger a manual reconnect after issue is resolved\n"); in rtrs_rdma_conn_established()
1740 return -ECONNRESET; in rtrs_rdma_conn_established()
1743 if (!sess->rbufs) { in rtrs_rdma_conn_established()
1744 kfree(sess->rbufs); in rtrs_rdma_conn_established()
1745 sess->rbufs = kcalloc(queue_depth, sizeof(*sess->rbufs), in rtrs_rdma_conn_established()
1747 if (!sess->rbufs) in rtrs_rdma_conn_established()
1748 return -ENOMEM; in rtrs_rdma_conn_established()
1750 sess->queue_depth = queue_depth; in rtrs_rdma_conn_established()
1751 sess->max_hdr_size = le32_to_cpu(msg->max_hdr_size); in rtrs_rdma_conn_established()
1752 sess->max_io_size = le32_to_cpu(msg->max_io_size); in rtrs_rdma_conn_established()
1753 sess->flags = le32_to_cpu(msg->flags); in rtrs_rdma_conn_established()
1754 sess->chunk_size = sess->max_io_size + sess->max_hdr_size; in rtrs_rdma_conn_established()
1759 * higher - client does not care and uses cached minimum. in rtrs_rdma_conn_established()
1764 mutex_lock(&clt->paths_mutex); in rtrs_rdma_conn_established()
1765 clt->queue_depth = sess->queue_depth; in rtrs_rdma_conn_established()
1766 clt->max_io_size = min_not_zero(sess->max_io_size, in rtrs_rdma_conn_established()
1767 clt->max_io_size); in rtrs_rdma_conn_established()
1768 mutex_unlock(&clt->paths_mutex); in rtrs_rdma_conn_established()
1773 sess->hca_port = con->c.cm_id->port_num; in rtrs_rdma_conn_established()
1774 scnprintf(sess->hca_name, sizeof(sess->hca_name), in rtrs_rdma_conn_established()
1775 sess->s.dev->ib_dev->name); in rtrs_rdma_conn_established()
1776 sess->s.src_addr = con->c.cm_id->route.addr.src_addr; in rtrs_rdma_conn_established()
1778 sess->for_new_clt = 1; in rtrs_rdma_conn_established()
1786 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in flag_success_on_conn()
1788 atomic_inc(&sess->connected_cnt); in flag_success_on_conn()
1789 con->cm_err = 1; in flag_success_on_conn()
1795 struct rtrs_sess *s = con->c.sess; in rtrs_rdma_conn_rejected()
1801 status = ev->status; in rtrs_rdma_conn_rejected()
1802 rej_msg = rdma_reject_msg(con->c.cm_id, status); in rtrs_rdma_conn_rejected()
1803 msg = rdma_consumer_reject_data(con->c.cm_id, ev, &data_len); in rtrs_rdma_conn_rejected()
1806 errno = (int16_t)le16_to_cpu(msg->errno); in rtrs_rdma_conn_rejected()
1807 if (errno == -EBUSY) in rtrs_rdma_conn_rejected()
1820 return -ECONNRESET; in rtrs_rdma_conn_rejected()
1826 queue_work(rtrs_wq, &sess->close_work); in rtrs_clt_close_conns()
1828 flush_work(&sess->close_work); in rtrs_clt_close_conns()
1833 if (con->cm_err == 1) { in flag_error_on_conn()
1836 sess = to_clt_sess(con->c.sess); in flag_error_on_conn()
1837 if (atomic_dec_and_test(&sess->connected_cnt)) in flag_error_on_conn()
1839 wake_up(&sess->state_wq); in flag_error_on_conn()
1841 con->cm_err = cm_err; in flag_error_on_conn()
1847 struct rtrs_clt_con *con = cm_id->context; in rtrs_clt_rdma_cm_handler()
1848 struct rtrs_sess *s = con->c.sess; in rtrs_clt_rdma_cm_handler()
1852 switch (ev->event) { in rtrs_clt_rdma_cm_handler()
1867 wake_up(&sess->state_wq); in rtrs_clt_rdma_cm_handler()
1876 rtrs_wrn(s, "CM error event %d\n", ev->event); in rtrs_clt_rdma_cm_handler()
1877 cm_err = -ECONNRESET; in rtrs_clt_rdma_cm_handler()
1881 cm_err = -EHOSTUNREACH; in rtrs_clt_rdma_cm_handler()
1886 cm_err = -ECONNRESET; in rtrs_clt_rdma_cm_handler()
1895 rtrs_err(s, "Unexpected RDMA CM event (%d)\n", ev->event); in rtrs_clt_rdma_cm_handler()
1896 cm_err = -ECONNRESET; in rtrs_clt_rdma_cm_handler()
1914 struct rtrs_sess *s = con->c.sess; in create_cm()
1920 sess->s.dst_addr.ss_family == AF_IB ? in create_cm()
1928 con->c.cm_id = cm_id; in create_cm()
1929 con->cm_err = 0; in create_cm()
1936 err = rdma_resolve_addr(cm_id, (struct sockaddr *)&sess->s.src_addr, in create_cm()
1937 (struct sockaddr *)&sess->s.dst_addr, in create_cm()
1949 sess->state_wq, in create_cm()
1950 con->cm_err || sess->state != RTRS_CLT_CONNECTING, in create_cm()
1952 if (err == 0 || err == -ERESTARTSYS) { in create_cm()
1954 err = -ETIMEDOUT; in create_cm()
1958 if (con->cm_err < 0) { in create_cm()
1959 err = con->cm_err; in create_cm()
1962 if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTING) { in create_cm()
1964 err = -ECONNABORTED; in create_cm()
1982 struct rtrs_clt *clt = sess->clt; in rtrs_clt_sess_up()
1992 mutex_lock(&clt->paths_ev_mutex); in rtrs_clt_sess_up()
1993 up = ++clt->paths_up; in rtrs_clt_sess_up()
1999 if (up > MAX_PATHS_NUM && up == MAX_PATHS_NUM + clt->paths_num) in rtrs_clt_sess_up()
2000 clt->paths_up = clt->paths_num; in rtrs_clt_sess_up()
2002 clt->link_ev(clt->priv, RTRS_CLT_LINK_EV_RECONNECTED); in rtrs_clt_sess_up()
2003 mutex_unlock(&clt->paths_ev_mutex); in rtrs_clt_sess_up()
2006 sess->established = true; in rtrs_clt_sess_up()
2007 sess->reconnect_attempts = 0; in rtrs_clt_sess_up()
2008 sess->stats->reconnects.successful_cnt++; in rtrs_clt_sess_up()
2013 struct rtrs_clt *clt = sess->clt; in rtrs_clt_sess_down()
2015 if (!sess->established) in rtrs_clt_sess_down()
2018 sess->established = false; in rtrs_clt_sess_down()
2019 mutex_lock(&clt->paths_ev_mutex); in rtrs_clt_sess_down()
2020 WARN_ON(!clt->paths_up); in rtrs_clt_sess_down()
2021 if (--clt->paths_up == 0) in rtrs_clt_sess_down()
2022 clt->link_ev(clt->priv, RTRS_CLT_LINK_EV_DISCONNECTED); in rtrs_clt_sess_down()
2023 mutex_unlock(&clt->paths_ev_mutex); in rtrs_clt_sess_down()
2031 WARN_ON(READ_ONCE(sess->state) == RTRS_CLT_CONNECTED); in rtrs_clt_stop_and_destroy_conns()
2037 mutex_lock(&sess->init_mutex); in rtrs_clt_stop_and_destroy_conns()
2038 mutex_unlock(&sess->init_mutex); in rtrs_clt_stop_and_destroy_conns()
2055 for (cid = 0; cid < sess->s.con_num; cid++) { in rtrs_clt_stop_and_destroy_conns()
2056 if (!sess->s.con[cid]) in rtrs_clt_stop_and_destroy_conns()
2058 con = to_clt_con(sess->s.con[cid]); in rtrs_clt_stop_and_destroy_conns()
2073 wait_event_timeout(sess->state_wq, !atomic_read(&sess->connected_cnt), in rtrs_clt_stop_and_destroy_conns()
2076 for (cid = 0; cid < sess->s.con_num; cid++) { in rtrs_clt_stop_and_destroy_conns()
2077 if (!sess->s.con[cid]) in rtrs_clt_stop_and_destroy_conns()
2079 con = to_clt_con(sess->s.con[cid]); in rtrs_clt_stop_and_destroy_conns()
2099 struct rtrs_clt *clt = sess->clt; in rtrs_clt_remove_path_from_arr()
2104 mutex_lock(&clt->paths_mutex); in rtrs_clt_remove_path_from_arr()
2105 list_del_rcu(&sess->s.entry); in rtrs_clt_remove_path_from_arr()
2130 * [!CONNECTED path] clt->paths_num-- in rtrs_clt_remove_path_from_arr()
2132 * load clt->paths_num from 2 to 1 in rtrs_clt_remove_path_from_arr()
2137 * ends, because expression i < clt->paths_num is false. in rtrs_clt_remove_path_from_arr()
2139 clt->paths_num--; in rtrs_clt_remove_path_from_arr()
2146 next = list_next_or_null_rr_rcu(&clt->paths_list, &sess->s.entry, in rtrs_clt_remove_path_from_arr()
2157 ppcpu_path = per_cpu_ptr(clt->pcpu_path, cpu); in rtrs_clt_remove_path_from_arr()
2159 lockdep_is_held(&clt->paths_mutex)) != sess) in rtrs_clt_remove_path_from_arr()
2184 mutex_unlock(&clt->paths_mutex); in rtrs_clt_remove_path_from_arr()
2190 struct rtrs_clt *clt = sess->clt; in rtrs_clt_add_path_to_arr()
2192 mutex_lock(&clt->paths_mutex); in rtrs_clt_add_path_to_arr()
2193 clt->paths_num++; in rtrs_clt_add_path_to_arr()
2195 list_add_tail_rcu(&sess->s.entry, &clt->paths_list); in rtrs_clt_add_path_to_arr()
2196 mutex_unlock(&clt->paths_mutex); in rtrs_clt_add_path_to_arr()
2205 cancel_delayed_work_sync(&sess->reconnect_dwork); in rtrs_clt_close_work()
2220 sess->s.recon_cnt++; in init_conns()
2223 for (cid = 0; cid < sess->s.con_num; cid++) { in init_conns()
2228 err = create_cm(to_clt_con(sess->s.con[cid])); in init_conns()
2230 destroy_con(to_clt_con(sess->s.con[cid])); in init_conns()
2243 while (cid--) { in init_conns()
2244 struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]); in init_conns()
2263 struct rtrs_clt_con *con = cq->cq_context; in rtrs_clt_info_req_done()
2264 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in rtrs_clt_info_req_done()
2267 iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); in rtrs_clt_info_req_done()
2268 rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); in rtrs_clt_info_req_done()
2270 if (unlikely(wc->status != IB_WC_SUCCESS)) { in rtrs_clt_info_req_done()
2271 rtrs_err(sess->clt, "Sess info request send failed: %s\n", in rtrs_clt_info_req_done()
2272 ib_wc_status_msg(wc->status)); in rtrs_clt_info_req_done()
2286 sg_cnt = le16_to_cpu(msg->sg_cnt); in process_info_rsp()
2288 return -EINVAL; in process_info_rsp()
2293 if (unlikely((ilog2(sg_cnt - 1) + 1) + in process_info_rsp()
2294 (ilog2(sess->chunk_size - 1) + 1) > in process_info_rsp()
2296 rtrs_err(sess->clt, in process_info_rsp()
2298 MAX_IMM_PAYL_BITS, sg_cnt, sess->chunk_size); in process_info_rsp()
2299 return -EINVAL; in process_info_rsp()
2301 if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) { in process_info_rsp()
2302 rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", in process_info_rsp()
2304 return -EINVAL; in process_info_rsp()
2307 for (sgi = 0, i = 0; sgi < sg_cnt && i < sess->queue_depth; sgi++) { in process_info_rsp()
2308 const struct rtrs_sg_desc *desc = &msg->desc[sgi]; in process_info_rsp()
2312 addr = le64_to_cpu(desc->addr); in process_info_rsp()
2313 rkey = le32_to_cpu(desc->key); in process_info_rsp()
2314 len = le32_to_cpu(desc->len); in process_info_rsp()
2318 if (unlikely(!len || (len % sess->chunk_size))) { in process_info_rsp()
2319 rtrs_err(sess->clt, "Incorrect [%d].len %d\n", sgi, in process_info_rsp()
2321 return -EINVAL; in process_info_rsp()
2323 for ( ; len && i < sess->queue_depth; i++) { in process_info_rsp()
2324 sess->rbufs[i].addr = addr; in process_info_rsp()
2325 sess->rbufs[i].rkey = rkey; in process_info_rsp()
2327 len -= sess->chunk_size; in process_info_rsp()
2328 addr += sess->chunk_size; in process_info_rsp()
2332 if (unlikely(sgi != sg_cnt || i != sess->queue_depth)) { in process_info_rsp()
2333 rtrs_err(sess->clt, "Incorrect sg vector, not fully mapped\n"); in process_info_rsp()
2334 return -EINVAL; in process_info_rsp()
2336 if (unlikely(total_len != sess->chunk_size * sess->queue_depth)) { in process_info_rsp()
2337 rtrs_err(sess->clt, "Incorrect total_len %d\n", total_len); in process_info_rsp()
2338 return -EINVAL; in process_info_rsp()
2346 struct rtrs_clt_con *con = cq->cq_context; in rtrs_clt_info_rsp_done()
2347 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); in rtrs_clt_info_rsp_done()
2356 WARN_ON(con->c.cid); in rtrs_clt_info_rsp_done()
2357 iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); in rtrs_clt_info_rsp_done()
2358 if (unlikely(wc->status != IB_WC_SUCCESS)) { in rtrs_clt_info_rsp_done()
2359 rtrs_err(sess->clt, "Sess info response recv failed: %s\n", in rtrs_clt_info_rsp_done()
2360 ib_wc_status_msg(wc->status)); in rtrs_clt_info_rsp_done()
2363 WARN_ON(wc->opcode != IB_WC_RECV); in rtrs_clt_info_rsp_done()
2365 if (unlikely(wc->byte_len < sizeof(*msg))) { in rtrs_clt_info_rsp_done()
2366 rtrs_err(sess->clt, "Sess info response is malformed: size %d\n", in rtrs_clt_info_rsp_done()
2367 wc->byte_len); in rtrs_clt_info_rsp_done()
2370 ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr, in rtrs_clt_info_rsp_done()
2371 iu->size, DMA_FROM_DEVICE); in rtrs_clt_info_rsp_done()
2372 msg = iu->buf; in rtrs_clt_info_rsp_done()
2373 if (unlikely(le16_to_cpu(msg->type) != RTRS_MSG_INFO_RSP)) { in rtrs_clt_info_rsp_done()
2374 rtrs_err(sess->clt, "Sess info response is malformed: type %d\n", in rtrs_clt_info_rsp_done()
2375 le16_to_cpu(msg->type)); in rtrs_clt_info_rsp_done()
2379 rx_sz += sizeof(msg->desc[0]) * le16_to_cpu(msg->sg_cnt); in rtrs_clt_info_rsp_done()
2380 if (unlikely(wc->byte_len < rx_sz)) { in rtrs_clt_info_rsp_done()
2381 rtrs_err(sess->clt, "Sess info response is malformed: size %d\n", in rtrs_clt_info_rsp_done()
2382 wc->byte_len); in rtrs_clt_info_rsp_done()
2397 rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); in rtrs_clt_info_rsp_done()
2403 struct rtrs_clt_con *usr_con = to_clt_con(sess->s.con[0]); in rtrs_send_sess_info()
2413 sess->s.dev->ib_dev, DMA_TO_DEVICE, in rtrs_send_sess_info()
2415 rx_iu = rtrs_iu_alloc(1, rx_sz, GFP_KERNEL, sess->s.dev->ib_dev, in rtrs_send_sess_info()
2418 err = -ENOMEM; in rtrs_send_sess_info()
2422 err = rtrs_iu_post_recv(&usr_con->c, rx_iu); in rtrs_send_sess_info()
2424 rtrs_err(sess->clt, "rtrs_iu_post_recv(), err: %d\n", err); in rtrs_send_sess_info()
2429 msg = tx_iu->buf; in rtrs_send_sess_info()
2430 msg->type = cpu_to_le16(RTRS_MSG_INFO_REQ); in rtrs_send_sess_info()
2431 memcpy(msg->sessname, sess->s.sessname, sizeof(msg->sessname)); in rtrs_send_sess_info()
2433 ib_dma_sync_single_for_device(sess->s.dev->ib_dev, tx_iu->dma_addr, in rtrs_send_sess_info()
2434 tx_iu->size, DMA_TO_DEVICE); in rtrs_send_sess_info()
2437 err = rtrs_iu_post_send(&usr_con->c, tx_iu, sizeof(*msg), NULL); in rtrs_send_sess_info()
2439 rtrs_err(sess->clt, "rtrs_iu_post_send(), err: %d\n", err); in rtrs_send_sess_info()
2445 wait_event_interruptible_timeout(sess->state_wq, in rtrs_send_sess_info()
2446 sess->state != RTRS_CLT_CONNECTING, in rtrs_send_sess_info()
2449 if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)) { in rtrs_send_sess_info()
2450 if (READ_ONCE(sess->state) == RTRS_CLT_CONNECTING_ERR) in rtrs_send_sess_info()
2451 err = -ECONNRESET; in rtrs_send_sess_info()
2453 err = -ETIMEDOUT; in rtrs_send_sess_info()
2459 rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); in rtrs_send_sess_info()
2461 rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); in rtrs_send_sess_info()
2470 * init_sess() - establishes all session connections and does handshake
2479 mutex_lock(&sess->init_mutex); in init_sess()
2482 rtrs_err(sess->clt, "init_conns(), err: %d\n", err); in init_sess()
2487 rtrs_err(sess->clt, "rtrs_send_sess_info(), err: %d\n", err); in init_sess()
2492 mutex_unlock(&sess->init_mutex); in init_sess()
2506 clt = sess->clt; in rtrs_clt_reconnect_work()
2508 if (READ_ONCE(sess->state) != RTRS_CLT_RECONNECTING) in rtrs_clt_reconnect_work()
2511 if (sess->reconnect_attempts >= clt->max_reconnect_attempts) { in rtrs_clt_reconnect_work()
2516 sess->reconnect_attempts++; in rtrs_clt_reconnect_work()
2531 sess->stats->reconnects.fail_cnt++; in rtrs_clt_reconnect_work()
2532 delay_ms = clt->reconnect_delay_sec * 1000; in rtrs_clt_reconnect_work()
2533 queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, in rtrs_clt_reconnect_work()
2544 mutex_destroy(&clt->paths_ev_mutex); in rtrs_clt_dev_release()
2545 mutex_destroy(&clt->paths_mutex); in rtrs_clt_dev_release()
2562 return ERR_PTR(-EINVAL); in alloc_clt()
2564 if (strlen(sessname) >= sizeof(clt->sessname)) in alloc_clt()
2565 return ERR_PTR(-EINVAL); in alloc_clt()
2569 return ERR_PTR(-ENOMEM); in alloc_clt()
2571 clt->pcpu_path = alloc_percpu(typeof(*clt->pcpu_path)); in alloc_clt()
2572 if (!clt->pcpu_path) { in alloc_clt()
2574 return ERR_PTR(-ENOMEM); in alloc_clt()
2577 clt->dev.class = rtrs_clt_dev_class; in alloc_clt()
2578 clt->dev.release = rtrs_clt_dev_release; in alloc_clt()
2579 uuid_gen(&clt->paths_uuid); in alloc_clt()
2580 INIT_LIST_HEAD_RCU(&clt->paths_list); in alloc_clt()
2581 clt->paths_num = paths_num; in alloc_clt()
2582 clt->paths_up = MAX_PATHS_NUM; in alloc_clt()
2583 clt->port = port; in alloc_clt()
2584 clt->pdu_sz = pdu_sz; in alloc_clt()
2585 clt->max_segments = max_segments; in alloc_clt()
2586 clt->max_segment_size = max_segment_size; in alloc_clt()
2587 clt->reconnect_delay_sec = reconnect_delay_sec; in alloc_clt()
2588 clt->max_reconnect_attempts = max_reconnect_attempts; in alloc_clt()
2589 clt->priv = priv; in alloc_clt()
2590 clt->link_ev = link_ev; in alloc_clt()
2591 clt->mp_policy = MP_POLICY_MIN_INFLIGHT; in alloc_clt()
2592 strlcpy(clt->sessname, sessname, sizeof(clt->sessname)); in alloc_clt()
2593 init_waitqueue_head(&clt->permits_wait); in alloc_clt()
2594 mutex_init(&clt->paths_ev_mutex); in alloc_clt()
2595 mutex_init(&clt->paths_mutex); in alloc_clt()
2596 device_initialize(&clt->dev); in alloc_clt()
2598 err = dev_set_name(&clt->dev, "%s", sessname); in alloc_clt()
2606 dev_set_uevent_suppress(&clt->dev, true); in alloc_clt()
2607 err = device_add(&clt->dev); in alloc_clt()
2611 clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj); in alloc_clt()
2612 if (!clt->kobj_paths) { in alloc_clt()
2613 err = -ENOMEM; in alloc_clt()
2618 kobject_del(clt->kobj_paths); in alloc_clt()
2619 kobject_put(clt->kobj_paths); in alloc_clt()
2622 dev_set_uevent_suppress(&clt->dev, false); in alloc_clt()
2623 kobject_uevent(&clt->dev.kobj, KOBJ_ADD); in alloc_clt()
2627 device_del(&clt->dev); in alloc_clt()
2629 free_percpu(clt->pcpu_path); in alloc_clt()
2630 put_device(&clt->dev); in alloc_clt()
2636 free_percpu(clt->pcpu_path); in free_clt()
2641 device_unregister(&clt->dev); in free_clt()
2645 * rtrs_clt_open() - Open a session to an RTRS server
2651 * @pdu_sz: Size of extra payload which can be accessed after permit allocation.
2656 * up, 0 for * disabled, -1 for forever
2676 clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv, in rtrs_clt_open()
2677 ops->link_ev, in rtrs_clt_open()
2694 sess->for_new_clt = 1; in rtrs_clt_open()
2695 list_add_tail_rcu(&sess->s.entry, &clt->paths_list); in rtrs_clt_open()
2699 list_del_rcu(&sess->s.entry); in rtrs_clt_open()
2701 free_percpu(sess->stats->pcpu_stats); in rtrs_clt_open()
2702 kfree(sess->stats); in rtrs_clt_open()
2709 list_del_rcu(&sess->s.entry); in rtrs_clt_open()
2711 free_percpu(sess->stats->pcpu_stats); in rtrs_clt_open()
2712 kfree(sess->stats); in rtrs_clt_open()
2724 list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) { in rtrs_clt_open()
2727 kobject_put(&sess->kobj); in rtrs_clt_open()
2739 * rtrs_clt_close() - Close a session
2751 list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) { in rtrs_clt_close()
2754 kobject_put(&sess->kobj); in rtrs_clt_close()
2764 int err = -EBUSY; in rtrs_clt_reconnect_from_sysfs()
2770 sess->reconnect_attempts = 0; in rtrs_clt_reconnect_from_sysfs()
2771 queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, 0); in rtrs_clt_reconnect_from_sysfs()
2779 flush_delayed_work(&sess->reconnect_dwork); in rtrs_clt_reconnect_from_sysfs()
2780 err = (READ_ONCE(sess->state) == in rtrs_clt_reconnect_from_sysfs()
2781 RTRS_CLT_CONNECTED ? 0 : -ENOTCONN); in rtrs_clt_reconnect_from_sysfs()
2803 * 1. State was changed to DEAD - we were fast and nobody in rtrs_clt_remove_path_from_sysfs()
2806 * 2. State was observed as DEAD - we have someone in parallel in rtrs_clt_remove_path_from_sysfs()
2819 kobject_put(&sess->kobj); in rtrs_clt_remove_path_from_sysfs()
2827 clt->max_reconnect_attempts = (unsigned int)value; in rtrs_clt_set_max_reconnect_attempts()
2832 return (int)clt->max_reconnect_attempts; in rtrs_clt_get_max_reconnect_attempts()
2836 * rtrs_clt_request() - Request data transfer to/from server via RDMA.
2841 * @permit: Preallocated permit
2860 struct rtrs_clt *clt, struct rtrs_permit *permit, in rtrs_clt_request() argument
2868 int err = -ECONNABORTED, i; in rtrs_clt_request()
2887 (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) { in rtrs_clt_request()
2888 if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)) in rtrs_clt_request()
2891 if (unlikely(usr_len + hdr_len > sess->max_hdr_size)) { in rtrs_clt_request()
2892 rtrs_wrn_rl(sess->clt, in rtrs_clt_request()
2895 usr_len, hdr_len, sess->max_hdr_size); in rtrs_clt_request()
2896 err = -EMSGSIZE; in rtrs_clt_request()
2899 req = rtrs_clt_get_req(sess, ops->conf_fn, permit, ops->priv, in rtrs_clt_request()
2907 req->in_use = false; in rtrs_clt_request()
2921 * rtrs_clt_query() - queries RTRS session attributes
2926 * -ECOMM no connection to the server
2931 return -ECOMM; in rtrs_clt_query()
2933 attr->queue_depth = clt->queue_depth; in rtrs_clt_query()
2934 attr->max_io_size = clt->max_io_size; in rtrs_clt_query()
2935 attr->sess_kobj = &clt->dev.kobj; in rtrs_clt_query()
2936 strlcpy(attr->sessname, clt->sessname, sizeof(attr->sessname)); in rtrs_clt_query()
2948 sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments, in rtrs_clt_create_path_from_sysfs()
2949 clt->max_segment_size); in rtrs_clt_create_path_from_sysfs()
2973 free_percpu(sess->stats->pcpu_stats); in rtrs_clt_create_path_from_sysfs()
2974 kfree(sess->stats); in rtrs_clt_create_path_from_sysfs()
2982 if (!(dev->ib_dev->attrs.device_cap_flags & in rtrs_clt_ib_dev_init()
2985 return -ENOTSUPP; in rtrs_clt_ib_dev_init()
2999 rtrs_clt_dev_class = class_create(THIS_MODULE, "rtrs-client"); in rtrs_client_init()
3001 pr_err("Failed to create rtrs-client dev class\n"); in rtrs_client_init()
3007 return -ENOMEM; in rtrs_client_init()