diff --git a/kernel-headers/rdma/irdma-abi.h b/kernel-headers/rdma/irdma-abi.h index bb18f1548..9c8cee075 100644 --- a/kernel-headers/rdma/irdma-abi.h +++ b/kernel-headers/rdma/irdma-abi.h @@ -20,11 +20,15 @@ enum irdma_memreg_type { IRDMA_MEMREG_TYPE_MEM = 0, IRDMA_MEMREG_TYPE_QP = 1, IRDMA_MEMREG_TYPE_CQ = 2, + IRDMA_MEMREG_TYPE_SRQ = 3, }; enum { IRDMA_ALLOC_UCTX_USE_RAW_ATTR = 1 << 0, IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE = 1 << 1, + IRDMA_ALLOC_UCTX_MAX_HW_SRQ_QUANTA = 1 << 2, + IRDMA_SUPPORT_WQE_FORMAT_V2 = 1 << 3, + IRDMA_SUPPORT_MAX_HW_PUSH_LEN = 1 << 4, }; struct irdma_alloc_ucontext_req { @@ -54,7 +58,8 @@ struct irdma_alloc_ucontext_resp { __u8 rsvd2; __aligned_u64 comp_mask; __u16 min_hw_wq_size; - __u8 rsvd3[6]; + __u32 max_hw_srq_quanta; + __u16 max_hw_push_len; }; struct irdma_alloc_pd_resp { @@ -71,6 +76,16 @@ struct irdma_create_cq_req { __aligned_u64 user_shadow_area; }; +struct irdma_create_srq_req { + __aligned_u64 user_srq_buf; + __aligned_u64 user_shadow_area; +}; + +struct irdma_create_srq_resp { + __u32 srq_id; + __u32 srq_size; +}; + struct irdma_create_qp_req { __aligned_u64 user_wqe_bufs; __aligned_u64 user_compl_ctx; diff --git a/providers/irdma/abi.h b/providers/irdma/abi.h index b98f354b5..0f87284c8 100644 --- a/providers/irdma/abi.h +++ b/providers/irdma/abi.h @@ -31,5 +31,7 @@ DECLARE_DRV_CMD(irdma_urereg_mr, IB_USER_VERBS_CMD_REREG_MR, irdma_mem_reg_req, empty); DECLARE_DRV_CMD(irdma_ucreate_ah, IB_USER_VERBS_CMD_CREATE_AH, empty, irdma_create_ah_resp); +DECLARE_DRV_CMD(irdma_ucreate_srq, IB_USER_VERBS_CMD_CREATE_SRQ, + irdma_create_srq_req, irdma_create_srq_resp); #endif /* PROVIDER_IRDMA_ABI_H */ diff --git a/providers/irdma/defs.h b/providers/irdma/defs.h index 8dd9d574c..81f34cdc6 100644 --- a/providers/irdma/defs.h +++ b/providers/irdma/defs.h @@ -19,14 +19,19 @@ #define IRDMA_QP_WQE_MIN_SIZE 32 #define IRDMA_QP_WQE_MAX_SIZE 256 #define IRDMA_QP_WQE_MIN_QUANTA 1 +#define IRDMA_DEFAULT_MAX_PUSH_LEN 8192 #define IRDMA_MAX_RQ_WQE_SHIFT_GEN1 2 #define IRDMA_MAX_RQ_WQE_SHIFT_GEN2 3 #define IRDMA_SQ_RSVD 258 #define IRDMA_RQ_RSVD 1 -#define IRDMA_FEATURE_RTS_AE 1ULL -#define IRDMA_FEATURE_CQ_RESIZE 2ULL +#define IRDMA_FEATURE_RTS_AE BIT_ULL(0) +#define IRDMA_FEATURE_CQ_RESIZE BIT_ULL(1) +#define IRDMA_FEATURE_64_BYTE_CQE BIT_ULL(5) +#define IRDMA_FEATURE_ATOMIC_OPS BIT_ULL(6) +#define IRDMA_FEATURE_SRQ BIT_ULL(7) +#define IRDMA_FEATURE_CQE_TIMESTAMPING BIT_ULL(8) #define IRDMAQP_OP_RDMA_WRITE 0x00 #define IRDMAQP_OP_RDMA_READ 0x01 #define IRDMAQP_OP_RDMA_SEND 0x03 @@ -38,6 +43,8 @@ #define IRDMAQP_OP_LOCAL_INVALIDATE 0x0a #define IRDMAQP_OP_RDMA_READ_LOC_INV 0x0b #define IRDMAQP_OP_NOP 0x0c +#define IRDMAQP_OP_ATOMIC_FETCH_ADD 0x0f +#define IRDMAQP_OP_ATOMIC_COMPARE_SWAP_ADD 0x11 #define IRDMA_CQPHC_QPCTX GENMASK_ULL(63, 0) #define IRDMA_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0) @@ -58,6 +65,7 @@ #define IRDMA_CQ_ERROR BIT_ULL(55) #define IRDMA_CQ_SQ BIT_ULL(62) +#define IRDMA_CQ_SRQ BIT_ULL(52) #define IRDMA_CQ_VALID BIT_ULL(63) #define IRDMA_CQ_IMMVALID BIT_ULL(62) #define IRDMA_CQ_UDSMACVALID BIT_ULL(61) @@ -70,7 +78,7 @@ #define IRDMA_CQ_IMMDATALOW32 GENMASK_ULL(31, 0) #define IRDMA_CQ_IMMDATAUP32 GENMASK_ULL(63, 32) #define IRDMACQ_PAYLDLEN GENMASK_ULL(31, 0) -#define IRDMACQ_TCPSEQNUMRTT GENMASK_ULL(63, 32) +#define IRDMACQ_TCPSQN_ROCEPSN_RTT_TS GENMASK_ULL(63, 32) #define IRDMACQ_INVSTAG GENMASK_ULL(31, 0) #define IRDMACQ_QPID GENMASK_ULL(55, 32) @@ -129,7 +137,7 @@ #define IRDMAQPSQ_MWSTAG GENMASK_ULL(31, 0) #define IRDMAQPSQ_BASEVA_TO_FBO IRDMA_CQPHC_QPCTX - +#define IRDMAQPSQ_REMOTE_ATOMICS_EN BIT_ULL(55) #define IRDMAQPSQ_LOCSTAG GENMASK_ULL(31, 0) /* iwarp QP RQ WQE common fields */ diff --git a/providers/irdma/irdma.h b/providers/irdma/irdma.h index 6491e3c75..845da8bba 100644 --- a/providers/irdma/irdma.h +++ b/providers/irdma/irdma.h @@ -6,9 +6,10 @@ #define IRDMA_WQEALLOC_WQE_DESC_INDEX GENMASK(31, 20) enum irdma_vers { - IRDMA_GEN_RSVD, - IRDMA_GEN_1, - IRDMA_GEN_2, + IRDMA_GEN_RSVD = 0, + IRDMA_GEN_1 = 1, + IRDMA_GEN_2 = 2, + IRDMA_GEN_3 = 3, }; struct irdma_uk_attrs { @@ -20,6 +21,8 @@ struct irdma_uk_attrs { __u32 max_hw_wq_quanta; __u32 min_hw_cq_size; __u32 max_hw_cq_size; + __u32 max_hw_srq_quanta; + __u16 max_hw_push_len; __u16 max_hw_sq_chunk; __u16 min_hw_wq_size; __u8 hw_rev; diff --git a/providers/irdma/osdep.h b/providers/irdma/osdep.h index c9ca5e01e..9d5a37f0c 100644 --- a/providers/irdma/osdep.h +++ b/providers/irdma/osdep.h @@ -15,6 +15,7 @@ #include #include #include +#include #include static inline void db_wr32(__u32 val, __u32 *wqe_word) diff --git a/providers/irdma/uk.c b/providers/irdma/uk.c index b8f64b69a..d9055e5c3 100644 --- a/providers/irdma/uk.c +++ b/providers/irdma/uk.c @@ -53,6 +53,17 @@ static void irdma_set_fragment_gen_1(__le64 *wqe, __u32 offset, } } +/** + * irdma_nop_hdr - Format header section of noop WQE + * @qp: hw qp ptr + */ +static inline __u64 irdma_nop_hdr(struct irdma_qp_uk *qp) +{ + return FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_NOP) | + FIELD_PREP(IRDMAQPSQ_SIGCOMPL, false) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); +} + /** * irdma_nop_1 - insert a NOP wqe * @qp: hw qp ptr @@ -135,24 +146,65 @@ static void irdma_qp_ring_push_db(struct irdma_qp_uk *qp, __u32 wqe_idx) qp->push_dropped = false; } +/** + * irdma_qp_push_wqe - setup push wqe and ring db + * @qp: hw qp ptr + * @wqe: wqe ptr + * @quanta: numbers of quanta in wqe + * @wqe_idx: wqe index + * @push_wqe: if to use push for the wqe + */ void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, __u16 quanta, - __u32 wqe_idx, bool post_sq) + __u32 wqe_idx, bool push_wqe) { __le64 *push; - if (IRDMA_RING_CURRENT_HEAD(qp->initial_ring) != - IRDMA_RING_CURRENT_TAIL(qp->sq_ring) && - !qp->push_mode) { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } else { + if (push_wqe) { push = (__le64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x7) * 0x20); - memcpy(push, wqe, quanta * IRDMA_QP_WQE_MIN_SIZE); + mmio_memcpy_x64(push, wqe, quanta * IRDMA_QP_WQE_MIN_SIZE); irdma_qp_ring_push_db(qp, wqe_idx); + qp->last_push_db = true; + } else if (qp->last_push_db) { + qp->last_push_db = false; + db_wr32(qp->qp_id, qp->wqe_alloc_db); + } else { + irdma_uk_qp_post_wr(qp); } } +/** + * irdma_push_ring_free - check if sq ring free to pust push wqe + * @qp: hw qp ptr + */ +static inline bool irdma_push_ring_free(struct irdma_qp_uk *qp) +{ + __u32 head, tail; + + head = IRDMA_RING_CURRENT_HEAD(qp->initial_ring); + tail = IRDMA_RING_CURRENT_TAIL(qp->sq_ring); + + if (head == tail || head == (tail + 1)) + return true; + + return false; +} + +/** + * irdma_enable_push_wqe - depending on sq ring and total size + * @qp: hw qp ptr + * @total_size: total data size + */ +static inline bool irdma_enable_push_wqe(struct irdma_qp_uk *qp, __u32 total_size) +{ + if (irdma_push_ring_free(qp) && + total_size <= qp->uk_attrs->max_hw_push_len) { + return true; + } + + return false; +} + /** * irdma_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go * @qp: hw qp ptr @@ -162,25 +214,31 @@ void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, __u16 quanta, * @info: info on WR */ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, __u32 *wqe_idx, - __u16 quanta, __u32 total_size, + __u16 *quanta, __u32 total_size, struct irdma_post_sq_info *info) { __le64 *wqe; __le64 *wqe_0 = NULL; __u32 nop_wqe_idx; + bool push_wqe_pad = false; + __u16 wqe_quanta = *quanta; __u16 avail_quanta; __u16 i; + if (qp->push_db && (*quanta & 0x1)) { + *quanta = *quanta + 1; + push_wqe_pad = true; + } avail_quanta = qp->uk_attrs->max_hw_sq_chunk - (IRDMA_RING_CURRENT_HEAD(qp->sq_ring) % qp->uk_attrs->max_hw_sq_chunk); - if (quanta <= avail_quanta) { + if (*quanta <= avail_quanta) { /* WR fits in current chunk */ - if (quanta > IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring)) + if (*quanta > IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring)) return NULL; } else { /* Need to pad with NOP */ - if (quanta + avail_quanta > + if (*quanta + avail_quanta > IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring)) return NULL; @@ -198,17 +256,56 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, __u32 *wqe_idx, if (!*wqe_idx) qp->swqe_polarity = !qp->swqe_polarity; - IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta); + IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, *quanta); wqe = qp->sq_base[*wqe_idx].elem; - if (qp->uk_attrs->hw_rev == IRDMA_GEN_1 && quanta == 1 && + if (qp->uk_attrs->hw_rev == IRDMA_GEN_1 && *quanta == 1 && (IRDMA_RING_CURRENT_HEAD(qp->sq_ring) & 1)) { wqe_0 = qp->sq_base[IRDMA_RING_CURRENT_HEAD(qp->sq_ring)].elem; wqe_0[3] = htole64(FIELD_PREP(IRDMAQPSQ_VALID, !qp->swqe_polarity)); } qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id; qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size; - qp->sq_wrtrk_array[*wqe_idx].quanta = quanta; + qp->sq_wrtrk_array[*wqe_idx].quanta = wqe_quanta; + + /* Push mode to WC memory requires multiples of 64-byte block writes. */ + if (push_wqe_pad) { + __le64 *push_wqe; + + nop_wqe_idx = *wqe_idx + wqe_quanta; + push_wqe = qp->sq_base[nop_wqe_idx].elem; + qp->sq_wrtrk_array[nop_wqe_idx].quanta = IRDMA_QP_WQE_MIN_QUANTA; + + set_64bit_val(push_wqe, 0, 0); + set_64bit_val(push_wqe, 8, 0); + set_64bit_val(push_wqe, 16, 0); + set_64bit_val(push_wqe, 24, irdma_nop_hdr(qp)); + } + + return wqe; +} + +/** + * irdma_srq_get_next_recv_wqe - get next srq's wqe + * @srq: hw srq ptr + * @wqe_idx: return wqe index + */ +__le64 *irdma_srq_get_next_recv_wqe(struct irdma_srq_uk *srq, __u32 *wqe_idx) +{ + __le64 *wqe; + int ret_code; + + if (IRDMA_RING_FULL_ERR(srq->srq_ring)) + return NULL; + + IRDMA_ATOMIC_RING_MOVE_HEAD(srq->srq_ring, *wqe_idx, ret_code); + if (ret_code) + return NULL; + + if (!*wqe_idx) + srq->srwqe_polarity = !srq->srwqe_polarity; + + wqe = srq->srq_base[*wqe_idx * srq->wqe_size_multiplier].elem; return wqe; } @@ -257,7 +354,7 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool read_fence = false; __u16 quanta; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe = false; op_info = &info->op.rdma_write; if (op_info->num_lo_sges > qp->max_sq_frag_cnt) @@ -277,7 +374,10 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, if (ret_code) return ret_code; - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, + if (qp->push_db) + info->push_wqe = irdma_enable_push_wqe(qp, total_size); + + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return ENOMEM; @@ -328,8 +428,8 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) { + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); } else { if (post_sq) irdma_uk_qp_post_wr(qp); @@ -338,6 +438,114 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, return 0; } +/** + * irdma_uk_atomic_fetch_add - atomic fetch and add operation + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +int irdma_uk_atomic_fetch_add(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq) +{ + struct irdma_atomic_fetch_add *op_info; + __u32 total_size = 0; + __u16 quanta = 2; + __u32 wqe_idx; + __le64 *wqe; + __u64 hdr; + + info->push_wqe = qp->push_db ? true : false; + + op_info = &info->op.atomic_fetch_add; + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); + if (!wqe) + return ENOMEM; + + set_64bit_val(wqe, 0, op_info->tagged_offset); + set_64bit_val(wqe, 8, + FIELD_PREP(IRDMAQPSQ_LOCSTAG, op_info->stag)); + set_64bit_val(wqe, 16, op_info->remote_tagged_offset); + + hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, 1) | + FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->remote_stag) | + FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_ATOMIC_FETCH_ADD) | + FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | + FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | + FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); + + set_64bit_val(wqe, 32, op_info->fetch_add_data_bytes); + set_64bit_val(wqe, 40, 0); + set_64bit_val(wqe, 48, 0); + set_64bit_val(wqe, 56, + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity)); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, 24, hdr); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); + else if (post_sq) + irdma_uk_qp_post_wr(qp); + + return 0; +} + +/** + * irdma_uk_atomic_compare_swap - atomic compare and swap operation + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +int irdma_uk_atomic_compare_swap(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq) +{ + struct irdma_atomic_compare_swap *op_info; + __u32 total_size = 0; + __u16 quanta = 2; + __u32 wqe_idx; + __le64 *wqe; + __u64 hdr; + + info->push_wqe = qp->push_db ? true : false; + + op_info = &info->op.atomic_compare_swap; + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); + if (!wqe) + return ENOMEM; + + set_64bit_val(wqe, 0, op_info->tagged_offset); + set_64bit_val(wqe, 8, + FIELD_PREP(IRDMAQPSQ_LOCSTAG, op_info->stag)); + set_64bit_val(wqe, 16, op_info->remote_tagged_offset); + + hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, 1) | + FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->remote_stag) | + FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_ATOMIC_COMPARE_SWAP_ADD) | + FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | + FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | + FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); + + set_64bit_val(wqe, 32, op_info->swap_data_bytes); + set_64bit_val(wqe, 40, op_info->compare_data_bytes); + set_64bit_val(wqe, 48, 0); + set_64bit_val(wqe, 56, + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity)); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, 24, hdr); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + else if (post_sq) + irdma_uk_qp_post_wr(qp); + + return 0; +} + /** * irdma_uk_rdma_read - rdma read command * @qp: hw qp ptr @@ -358,7 +566,7 @@ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, __u16 quanta; __u64 hdr; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe &= qp->push_db ? true : false; op_info = &info->op.rdma_read; if (qp->max_sq_frag_cnt < op_info->num_lo_sges) @@ -371,7 +579,7 @@ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, if (ret_code) return ret_code; - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return ENOMEM; @@ -415,7 +623,7 @@ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { + if (qp->push_db) { irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); } else { if (post_sq) @@ -443,7 +651,7 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool read_fence = false; __u16 quanta; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe = false; op_info = &info->op.send; if (qp->max_sq_frag_cnt < op_info->num_sges) @@ -460,7 +668,10 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, if (ret_code) return ret_code; - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, + if (qp->push_db) + info->push_wqe = irdma_enable_push_wqe(qp, total_size); + + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return ENOMEM; @@ -516,7 +727,7 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { + if (qp->push_db) { irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); } else { if (post_sq) @@ -708,7 +919,7 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, return EINVAL; quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return ENOMEM; @@ -741,8 +952,8 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) { + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); } else { if (post_sq) irdma_uk_qp_post_wr(qp); @@ -781,7 +992,7 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, return EINVAL; quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return ENOMEM; @@ -839,18 +1050,19 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { - __le64 *wqe; + __u16 quanta = IRDMA_QP_WQE_MIN_QUANTA; struct irdma_inv_local_stag *op_info; - __u64 hdr; - __u32 wqe_idx; bool local_fence = false; struct ibv_sge sge = {}; + __le64 *wqe; + __u32 wqe_idx; + __u64 hdr; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.inv_local_stag; local_fence = info->local_fence; - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, 0, info); if (!wqe) return ENOMEM; @@ -873,9 +1085,9 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { + if (qp->push_db) { irdma_qp_push_wqe(qp, wqe, IRDMA_QP_WQE_MIN_QUANTA, wqe_idx, - post_sq); + info->push_wqe); } else { if (post_sq) irdma_uk_qp_post_wr(qp); @@ -898,12 +1110,13 @@ int irdma_uk_mw_bind(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, __u64 hdr; __u32 wqe_idx; bool local_fence = false; + __u16 quanta = IRDMA_QP_WQE_MIN_QUANTA; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.bind_window; local_fence |= info->local_fence; - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, 0, info); if (!wqe) return ENOMEM; @@ -923,6 +1136,7 @@ int irdma_uk_mw_bind(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_REMOTE_ATOMICS_EN, op_info->remote_atomics_en) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ @@ -940,6 +1154,58 @@ int irdma_uk_mw_bind(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, return 0; } +/** + * irdma_uk_srq_post_receive - post a receive wqe to a shared rq + * @srq: shared rq ptr + * @info: post rq information + */ +int irdma_uk_srq_post_receive(struct irdma_srq_uk *srq, + struct irdma_post_rq_info *info) +{ + __u32 wqe_idx, i, byte_off; + __u32 addl_frag_cnt; + __le64 *wqe; + __u64 hdr; + + if (srq->max_srq_frag_cnt < info->num_sges) + return EINVAL; + + wqe = irdma_srq_get_next_recv_wqe(srq, &wqe_idx); + if (!wqe) + return ENOMEM; + + addl_frag_cnt = info->num_sges > 1 ? info->num_sges - 1 : 0; + srq->wqe_ops.iw_set_fragment(wqe, 0, info->sg_list, + srq->srwqe_polarity); + + for (i = 1, byte_off = 32; i < info->num_sges; i++) { + srq->wqe_ops.iw_set_fragment(wqe, byte_off, &info->sg_list[i], + srq->srwqe_polarity); + byte_off += 16; + } + + /* if not an odd number set valid bit in next fragment */ + if (srq->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(info->num_sges & 0x01) && + info->num_sges) { + srq->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, + srq->srwqe_polarity); + if (srq->uk_attrs->hw_rev == IRDMA_GEN_2) + ++addl_frag_cnt; + } + + set_64bit_val(wqe, 16, (__u64)info->wr_id); + hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | + FIELD_PREP(IRDMAQPSQ_VALID, srq->srwqe_polarity); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, 24, hdr); + + set_64bit_val(srq->shadow_area, 0, (wqe_idx + 1) % srq->srq_ring.size); + + return 0; +} + /** * irdma_uk_post_receive - post receive wqe * @qp: hw qp ptr @@ -1081,12 +1347,15 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, __le64 *cqe; struct irdma_qp_uk *qp; struct irdma_ring *pring = NULL; + struct irdma_srq_uk *srq; + struct qp_err_code qp_err; __u32 wqe_idx; int ret_code; bool move_cq_head = true; __u8 polarity; bool ext_valid; __le64 *ext_cqe; + __u8 is_srq; if (cq->avoid_mem_cflct) cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq); @@ -1152,23 +1421,52 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, info->ud_vlan_valid = false; } + get_64bit_val(cqe, 8, &comp_ctx); + qp = (struct irdma_qp_uk *)(uintptr_t)comp_ctx; + info->q_type = (__u8)FIELD_GET(IRDMA_CQ_SQ, qword3); + is_srq = (__u8)FIELD_GET(IRDMA_CQ_SRQ, qword3); info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3); info->push_dropped = (bool)FIELD_GET(IRDMACQ_PSHDROP, qword3); info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3); + get_64bit_val(cqe, 8, &comp_ctx); + if (is_srq) + get_64bit_val(cqe, 40, (__u64 *)&qp); + else + qp = (struct irdma_qp_uk *)(uintptr_t)comp_ctx; if (info->error) { info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3); info->minor_err = FIELD_GET(IRDMA_CQ_MINERR, qword3); - if (info->major_err == IRDMA_FLUSH_MAJOR_ERR) { - info->comp_status = IRDMA_COMPL_STATUS_FLUSHED; + switch (info->major_err) { + case IRDMA_SRQFLUSH_RSVD_MAJOR_ERR: + qp_err = irdma_ae_to_qp_err_code(info->minor_err); + info->minor_err = qp_err.flush_code; + SWITCH_FALLTHROUGH; + case IRDMA_FLUSH_MAJOR_ERR: /* Set the min error to standard flush error code for remaining cqes */ if (info->minor_err != FLUSH_GENERAL_ERR) { qword3 &= ~IRDMA_CQ_MINERR; qword3 |= FIELD_PREP(IRDMA_CQ_MINERR, FLUSH_GENERAL_ERR); set_64bit_val(cqe, 24, qword3); } - } else { - info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN; + info->comp_status = IRDMA_COMPL_STATUS_FLUSHED; + break; + default: +#define IRDMA_CIE_SIGNATURE 0xE +#define IRDMA_CQMAJERR_HIGH_NIBBLE GENMASK(15, 12) + /* For UD SQ, certain non-fatal async errors are indicated through CQEs and need to be ignored */ + if (info->q_type == IRDMA_CQE_QTYPE_SQ && + qp->qp_type == IRDMA_QP_TYPE_ROCE_UD && + FIELD_GET(IRDMA_CQMAJERR_HIGH_NIBBLE, info->major_err) + == IRDMA_CIE_SIGNATURE) { + info->error = 0; + info->major_err = 0; + info->minor_err = 0; + info->comp_status = IRDMA_COMPL_STATUS_SUCCESS; + } else { + info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN; + } + break; } } else { info->comp_status = IRDMA_COMPL_STATUS_SUCCESS; @@ -1177,14 +1475,11 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, get_64bit_val(cqe, 0, &qword0); get_64bit_val(cqe, 16, &qword2); - info->tcp_seq_num_rtt = (__u32)FIELD_GET(IRDMACQ_TCPSEQNUMRTT, qword0); + info->stat.raw = (__u32)FIELD_GET(IRDMACQ_TCPSQN_ROCEPSN_RTT_TS, qword0); info->qp_id = (__u32)FIELD_GET(IRDMACQ_QPID, qword2); info->ud_src_qpn = (__u32)FIELD_GET(IRDMACQ_UDSRCQPN, qword2); - - get_64bit_val(cqe, 8, &comp_ctx); - info->solicited_event = (bool)FIELD_GET(IRDMACQ_SOEVENT, qword3); - qp = (struct irdma_qp_uk *)(uintptr_t)comp_ctx; + if (!qp || qp->destroy_pending) { ret_code = EFAULT; goto exit; @@ -1193,7 +1488,21 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, info->qp_handle = (irdma_qp_handle)(uintptr_t)qp; info->op_type = (__u8)FIELD_GET(IRDMACQ_OP, qword3); - if (info->q_type == IRDMA_CQE_QTYPE_RQ) { + if (info->q_type == IRDMA_CQE_QTYPE_RQ && is_srq) { + srq = qp->srq_uk; + + get_64bit_val(cqe, 8, &info->wr_id); + info->bytes_xfered = (__u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0); + + if (qword3 & IRDMACQ_STAG) { + info->stag_invalid_set = true; + info->inv_stag = (__u32)FIELD_GET(IRDMACQ_INVSTAG, qword2); + } else { + info->stag_invalid_set = false; + } + IRDMA_RING_MOVE_TAIL(srq->srq_ring); + pring = &srq->srq_ring; + } else if (info->q_type == IRDMA_CQE_QTYPE_RQ && !is_srq) { __u32 array_idx; array_idx = wqe_idx / qp->rq_wqe_size_multiplier; @@ -1289,9 +1598,15 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, ret_code = 0; exit: - if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) + if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) { if (pring && IRDMA_RING_MORE_WORK(*pring)) - move_cq_head = false; + /* Park CQ head during a flush to generate additional CQEs + * from SW for all unprocessed WQEs. For GEN3 and beyond + * FW will generate/flush these CQEs so move to the next CQE + */ + move_cq_head = qp->uk_attrs->hw_rev <= IRDMA_GEN_2 ? + false : true; + } if (move_cq_head) { IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); @@ -1409,6 +1724,25 @@ int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, return 0; } +/* + * irdma_get_srqdepth - get SRQ depth (quanta) + * @uk_attrs: qp HW attributes + * @srq_size: SRQ size + * @shift: shift which determines size of WQE + * @srqdepth: depth of SRQ + */ +int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, __u32 srq_size, __u8 shift, __u32 *srqdepth) +{ + *srqdepth = irdma_qp_round_up((srq_size << shift) + IRDMA_RQ_RSVD); + + if (*srqdepth < ((__u32)uk_attrs->min_hw_wq_size << shift)) + *srqdepth = uk_attrs->min_hw_wq_size << shift; + else if (*srqdepth > uk_attrs->max_hw_srq_quanta) + return EINVAL; + + return 0; +} + static const struct irdma_wqe_uk_ops iw_wqe_uk_ops = { .iw_copy_inline_data = irdma_copy_inline_data, .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta, @@ -1444,6 +1778,42 @@ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp, IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->initial_ring, move_cnt); } +/** + * irdma_uk_srq_init - initialize shared qp + * @srq: hw srq (user and kernel) + * @info: srq initialization info + * + * initializes the vars used in both user and kernel mode. + * size of the wqe depends on numbers of max. fragements + * allowed. Then size of wqe * the number of wqes should be the + * amount of memory allocated for srq. + */ +int irdma_uk_srq_init(struct irdma_srq_uk *srq, + struct irdma_srq_uk_init_info *info) +{ + __u8 rqshift; + + srq->uk_attrs = info->uk_attrs; + if (info->max_srq_frag_cnt > srq->uk_attrs->max_hw_wq_frags) + return EINVAL; + + irdma_get_wqe_shift(srq->uk_attrs, info->max_srq_frag_cnt, 0, &rqshift); + srq->srq_caps = info->srq_caps; + srq->srq_base = info->srq; + srq->shadow_area = info->shadow_area; + srq->srq_id = info->srq_id; + srq->srwqe_polarity = 0; + srq->srq_size = info->srq_size; + srq->wqe_size = rqshift; + srq->max_srq_frag_cnt = min(srq->uk_attrs->max_hw_wq_frags, + ((__u32)2 << rqshift) - 1); + IRDMA_RING_INIT(srq->srq_ring, srq->srq_size); + srq->wqe_size_multiplier = 1 << rqshift; + srq->wqe_ops = iw_wqe_uk_ops; + + return 0; +} + /** * irdma_uk_calc_depth_shift_sq - calculate depth and shift for SQ size. * @ukinfo: qp initialization info @@ -1547,6 +1917,8 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, qp->wqe_ops = iw_wqe_uk_ops_gen_1; else qp->wqe_ops = iw_wqe_uk_ops; + qp->srq_uk = info->srq_uk; + return ret_code; } @@ -1618,10 +1990,11 @@ int irdma_nop(struct irdma_qp_uk *qp, __u64 wr_id, bool signaled, bool post_sq) __u64 hdr; __u32 wqe_idx; struct irdma_post_sq_info info = {}; + __u16 quanta = IRDMA_QP_WQE_MIN_QUANTA; info.push_wqe = false; info.wr_id = wr_id; - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, 0, &info); if (!wqe) return ENOMEM; diff --git a/providers/irdma/umain.c b/providers/irdma/umain.c index a22ec1a76..74f498666 100644 --- a/providers/irdma/umain.c +++ b/providers/irdma/umain.c @@ -86,20 +86,25 @@ static const struct verbs_context_ops irdma_uctx_ops = { .create_cq = irdma_ucreate_cq, .create_cq_ex = irdma_ucreate_cq_ex, .create_qp = irdma_ucreate_qp, + .create_srq = irdma_ucreate_srq, .dealloc_mw = irdma_udealloc_mw, .dealloc_pd = irdma_ufree_pd, .dereg_mr = irdma_udereg_mr, .destroy_ah = irdma_udestroy_ah, .destroy_cq = irdma_udestroy_cq, .destroy_qp = irdma_udestroy_qp, + .destroy_srq = irdma_udestroy_srq, .detach_mcast = irdma_udetach_mcast, .modify_qp = irdma_umodify_qp, + .modify_srq = irdma_umodify_srq, .poll_cq = irdma_upoll_cq, .post_recv = irdma_upost_recv, .post_send = irdma_upost_send, + .post_srq_recv = irdma_upost_srq, .query_device_ex = irdma_uquery_device_ex, .query_port = irdma_uquery_port, .query_qp = irdma_uquery_qp, + .query_srq = irdma_uquery_srq, .reg_dmabuf_mr = irdma_ureg_mr_dmabuf, .reg_mr = irdma_ureg_mr, .rereg_mr = irdma_urereg_mr, @@ -154,6 +159,7 @@ static struct verbs_context *irdma_ualloc_context(struct ibv_device *ibdev, return NULL; cmd.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR; + cmd.comp_mask |= IRDMA_SUPPORT_WQE_FORMAT_V2; cmd.userspace_ver = user_ver; if (ibv_cmd_get_context(&iwvctx->ibv_ctx, (struct ibv_get_context *)&cmd, sizeof(cmd), @@ -192,6 +198,12 @@ static struct verbs_context *irdma_ualloc_context(struct ibv_device *ibdev, iwvctx->uk_attrs.min_hw_wq_size = resp.min_hw_wq_size; else iwvctx->uk_attrs.min_hw_wq_size = IRDMA_QP_SW_MIN_WQSIZE; + if (resp.comp_mask & IRDMA_SUPPORT_MAX_HW_PUSH_LEN) + iwvctx->uk_attrs.max_hw_push_len = resp.max_hw_push_len; + else + iwvctx->uk_attrs.max_hw_push_len = IRDMA_DEFAULT_MAX_PUSH_LEN; + + iwvctx->uk_attrs.max_hw_srq_quanta = resp.max_hw_srq_quanta; mmap_key = resp.db_mmap_key; } diff --git a/providers/irdma/umain.h b/providers/irdma/umain.h index a69c82b1a..f40ffb641 100644 --- a/providers/irdma/umain.h +++ b/providers/irdma/umain.h @@ -21,15 +21,14 @@ #define IRDMA_DB_CQ_OFFSET 64 enum irdma_supported_wc_flags { - IRDMA_CQ_SUPPORTED_WC_FLAGS = IBV_WC_EX_WITH_BYTE_LEN + IRDMA_STANDARD_WC_FLAGS_EX = IBV_WC_EX_WITH_BYTE_LEN | IBV_WC_EX_WITH_IMM | IBV_WC_EX_WITH_QP_NUM | IBV_WC_EX_WITH_SRC_QP | IBV_WC_EX_WITH_SLID - | IBV_WC_EX_WITH_SL - | IBV_WC_EX_WITH_DLID_PATH_BITS - | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK - | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP, + | IBV_WC_EX_WITH_SL, + IRDMA_GEN3_WC_FLAGS_EX = IRDMA_STANDARD_WC_FLAGS_EX | + IBV_WC_EX_WITH_COMPLETION_TIMESTAMP, }; struct irdma_udevice { @@ -65,6 +64,15 @@ struct irdma_cq_buf { struct list_node list; struct irdma_cq_uk cq; struct verbs_mr vmr; + size_t buf_size; +}; + +struct irdma_usrq { + struct verbs_srq v_srq; + struct verbs_mr vmr; + pthread_spinlock_t lock; + struct irdma_srq_uk srq; + size_t buf_size; }; struct irdma_ucq { @@ -158,6 +166,14 @@ int irdma_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); int irdma_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); +struct ibv_srq *irdma_ucreate_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *initattr); +int irdma_udestroy_srq(struct ibv_srq *ibsrq); +int irdma_uquery_srq(struct ibv_srq *ibsrq, struct ibv_srq_attr *attr); +int irdma_umodify_srq(struct ibv_srq *ibsrq, struct ibv_srq_attr *attr, + int attr_mask); +int irdma_upost_srq(struct ibv_srq *ib_srq, struct ibv_recv_wr *ib_wr, + struct ibv_recv_wr **bad_wr); void irdma_async_event(struct ibv_context *context, struct ibv_async_event *event); void irdma_set_hw_attrs(struct irdma_hw_attrs *attrs); diff --git a/providers/irdma/user.h b/providers/irdma/user.h index 9fa73c4e2..0d9701b75 100644 --- a/providers/irdma/user.h +++ b/providers/irdma/user.h @@ -43,10 +43,113 @@ #define IRDMA_OP_TYPE_INV_STAG 0x0a #define IRDMA_OP_TYPE_RDMA_READ_INV_STAG 0x0b #define IRDMA_OP_TYPE_NOP 0x0c +#define IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD 0x0f +#define IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP 0x11 #define IRDMA_OP_TYPE_REC 0x3e #define IRDMA_OP_TYPE_REC_IMM 0x3f #define IRDMA_FLUSH_MAJOR_ERR 1 +#define IRDMA_SRQFLUSH_RSVD_MAJOR_ERR 0xfffe +/* Async Events codes */ +#define IRDMA_AE_AMP_UNALLOCATED_STAG 0x0102 +#define IRDMA_AE_AMP_INVALID_STAG 0x0103 +#define IRDMA_AE_AMP_BAD_QP 0x0104 +#define IRDMA_AE_AMP_BAD_PD 0x0105 +#define IRDMA_AE_AMP_BAD_STAG_KEY 0x0106 +#define IRDMA_AE_AMP_BAD_STAG_INDEX 0x0107 +#define IRDMA_AE_AMP_BOUNDS_VIOLATION 0x0108 +#define IRDMA_AE_AMP_RIGHTS_VIOLATION 0x0109 +#define IRDMA_AE_AMP_TO_WRAP 0x010a +#define IRDMA_AE_AMP_FASTREG_VALID_STAG 0x010c +#define IRDMA_AE_AMP_FASTREG_MW_STAG 0x010d +#define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS 0x010e +#define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH 0x0110 +#define IRDMA_AE_AMP_INVALIDATE_SHARED 0x0111 +#define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112 +#define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113 +#define IRDMA_AE_AMP_MWBIND_VALID_STAG 0x0114 +#define IRDMA_AE_AMP_MWBIND_OF_MR_STAG 0x0115 +#define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116 +#define IRDMA_AE_AMP_MWBIND_TO_MW_STAG 0x0117 +#define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118 +#define IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119 +#define IRDMA_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a +#define IRDMA_AE_AMP_MWBIND_BIND_DISABLED 0x011b +#define IRDMA_AE_PRIV_OPERATION_DENIED 0x011c +#define IRDMA_AE_AMP_INVALIDATE_TYPE1_MW 0x011d +#define IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW 0x011e +#define IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG 0x011f +#define IRDMA_AE_AMP_MWBIND_WRONG_TYPE 0x0120 +#define IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH 0x0121 +#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0132 +#define IRDMA_AE_UDA_XMIT_BAD_PD 0x0133 +#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT 0x0134 +#define IRDMA_AE_UDA_L4LEN_INVALID 0x0135 +#define IRDMA_AE_BAD_CLOSE 0x0201 +#define IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202 +#define IRDMA_AE_CQ_OPERATION_ERROR 0x0203 +#define IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205 +#define IRDMA_AE_STAG_ZERO_INVALID 0x0206 +#define IRDMA_AE_IB_RREQ_AND_Q1_FULL 0x0207 +#define IRDMA_AE_IB_INVALID_REQUEST 0x0208 +#define IRDMA_AE_SRQ_LIMIT 0x0209 +#define IRDMA_AE_WQE_UNEXPECTED_OPCODE 0x020a +#define IRDMA_AE_WQE_INVALID_PARAMETER 0x020b +#define IRDMA_AE_WQE_INVALID_FRAG_DATA 0x020c +#define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d +#define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e +#define IRDMA_AE_SRQ_CATASTROPHIC_ERROR 0x020f +#define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220 +#define IRDMA_AE_ATOMIC_ALIGNMENT 0x0221 +#define IRDMA_AE_ATOMIC_MASK 0x0222 +#define IRDMA_AE_INVALID_REQUEST 0x0223 +#define IRDMA_AE_PCIE_ATOMIC_DISABLE 0x0224 +#define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301 +#define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303 +#define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304 +#define IRDMA_AE_DDP_UBE_INVALID_MO 0x0305 +#define IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE 0x0306 +#define IRDMA_AE_DDP_UBE_INVALID_QN 0x0307 +#define IRDMA_AE_DDP_NO_L_BIT 0x0308 +#define IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION 0x0311 +#define IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE 0x0312 +#define IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST 0x0313 +#define IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP 0x0314 +#define IRDMA_AE_ROCE_RSP_LENGTH_ERROR 0x0316 +#define IRDMA_AE_ROCE_EMPTY_MCG 0x0380 +#define IRDMA_AE_ROCE_BAD_MC_IP_ADDR 0x0381 +#define IRDMA_AE_ROCE_BAD_MC_QPID 0x0382 +#define IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH 0x0383 +#define IRDMA_AE_INVALID_ARP_ENTRY 0x0401 +#define IRDMA_AE_INVALID_TCP_OPTION_RCVD 0x0402 +#define IRDMA_AE_STALE_ARP_ENTRY 0x0403 +#define IRDMA_AE_INVALID_AH_ENTRY 0x0406 +#define IRDMA_AE_LLP_CLOSE_COMPLETE 0x0501 +#define IRDMA_AE_LLP_CONNECTION_RESET 0x0502 +#define IRDMA_AE_LLP_FIN_RECEIVED 0x0503 +#define IRDMA_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH 0x0504 +#define IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505 +#define IRDMA_AE_LLP_SEGMENT_TOO_SMALL 0x0507 +#define IRDMA_AE_LLP_SYN_RECEIVED 0x0508 +#define IRDMA_AE_LLP_TERMINATE_RECEIVED 0x0509 +#define IRDMA_AE_LLP_TOO_MANY_RETRIES 0x050a +#define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b +#define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c +#define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e +#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f +#define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520 +#define IRDMA_AE_RESET_SENT 0x0601 +#define IRDMA_AE_TERMINATE_SENT 0x0602 +#define IRDMA_AE_RESET_NOT_SENT 0x0603 +#define IRDMA_AE_LCE_QP_CATASTROPHIC 0x0700 +#define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701 +#define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702 +#define IRDMA_AE_REMOTE_QP_CATASTROPHIC 0x0703 +#define IRDMA_AE_LOCAL_QP_CATASTROPHIC 0x0704 +#define IRDMA_AE_RCE_QP_CATASTROPHIC 0x0705 +#define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900 +#define IRDMA_AE_CQP_DEFERRED_COMPLETE 0x0901 +#define IRDMA_AE_ADAPTER_CATASTROPHIC 0x0B0B enum irdma_device_caps_const { IRDMA_WQE_SIZE = 4, @@ -106,8 +209,15 @@ enum irdma_flush_opcode { FLUSH_RETRY_EXC_ERR, FLUSH_MW_BIND_ERR, FLUSH_REM_INV_REQ_ERR, + FLUSH_RNR_RETRY_EXC_ERR, }; +enum irdma_qp_event_type { + IRDMA_QP_EVENT_CATASTROPHIC, + IRDMA_QP_EVENT_ACCESS_ERR, + IRDMA_QP_EVENT_REQ_ERR, + }; + enum irdma_cmpl_status { IRDMA_COMPL_STATUS_SUCCESS = 0, IRDMA_COMPL_STATUS_FLUSHED, @@ -148,6 +258,8 @@ enum irdma_qp_caps { IRDMA_PUSH_MODE = 8, }; +struct irdma_srq_uk; +struct irdma_srq_uk_init_info; struct irdma_qp_uk; struct irdma_cq_uk; struct irdma_qp_uk_init_info; @@ -202,6 +314,24 @@ struct irdma_bind_window { bool ena_writes:1; irdma_stag mw_stag; bool mem_window_type_1:1; + bool remote_atomics_en:1; +}; + +struct irdma_atomic_fetch_add { + __u64 tagged_offset; + __u64 remote_tagged_offset; + __u64 fetch_add_data_bytes; + __u32 stag; + __u32 remote_stag; +}; + +struct irdma_atomic_compare_swap { + __u64 tagged_offset; + __u64 remote_tagged_offset; + __u64 swap_data_bytes; + __u64 compare_data_bytes; + __u32 stag; + __u32 remote_stag; }; struct irdma_inv_local_stag { @@ -221,6 +351,7 @@ struct irdma_post_sq_info { bool report_rtt:1; bool udp_hdr:1; bool defer_flag:1; + bool remote_atomic_en:1; __u32 imm_data; __u32 stag_to_inv; union { @@ -229,6 +360,8 @@ struct irdma_post_sq_info { struct irdma_rdma_read rdma_read; struct irdma_bind_window bind_window; struct irdma_inv_local_stag inv_local_stag; + struct irdma_atomic_fetch_add atomic_fetch_add; + struct irdma_atomic_compare_swap atomic_compare_swap; } op; }; @@ -236,7 +369,6 @@ struct irdma_cq_poll_info { __u64 wr_id; irdma_qp_handle qp_handle; __u32 bytes_xfered; - __u32 tcp_seq_num_rtt; __u32 qp_id; __u32 ud_src_qpn; __u32 imm_data; @@ -256,8 +388,24 @@ struct irdma_cq_poll_info { bool ud_vlan_valid:1; bool ud_smac_valid:1; bool imm_valid:1; + union { + __u32 tcp_sqn; + __u32 roce_psn; + __u32 rtt; + __u32 timestamp; + __u32 raw; + } stat; +}; + +struct qp_err_code { + enum irdma_flush_opcode flush_code; + enum irdma_qp_event_type event_type; }; +int irdma_uk_atomic_compare_swap(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq); +int irdma_uk_atomic_fetch_add(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_inline_send(struct irdma_qp_uk *qp, @@ -303,6 +451,38 @@ int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, __u32 *sq_depth, __u8 *sq_shift); int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, __u32 *rq_depth, __u8 *rq_shift); +int irdma_uk_srq_init(struct irdma_srq_uk *srq, + struct irdma_srq_uk_init_info *info); +int irdma_uk_srq_post_receive(struct irdma_srq_uk *srq, + struct irdma_post_rq_info *info); + +struct irdma_srq_uk { + __u32 srq_caps; + struct irdma_qp_quanta *srq_base; + struct irdma_uk_attrs *uk_attrs; + __le64 *shadow_area; + struct irdma_ring srq_ring; + struct irdma_ring initial_ring; + __u32 srq_id; + __u32 srq_size; + __u32 max_srq_frag_cnt; + struct irdma_wqe_uk_ops wqe_ops; + __u8 srwqe_polarity; + __u8 wqe_size; + __u8 wqe_size_multiplier; + __u8 deferred_flag; +}; + +struct irdma_srq_uk_init_info { + struct irdma_qp_quanta *srq; + struct irdma_uk_attrs *uk_attrs; + __le64 *shadow_area; + __u64 *srq_wrid_array; + __u32 srq_id; + __u32 srq_caps; + __u32 srq_size; + __u32 max_srq_frag_cnt; +}; struct irdma_sq_uk_wr_trk_info { __u64 wrid; @@ -318,6 +498,7 @@ struct irdma_qp_quanta { struct irdma_qp_uk { struct irdma_qp_quanta *sq_base; struct irdma_qp_quanta *rq_base; + struct irdma_srq_uk *srq_uk; struct irdma_uk_attrs *uk_attrs; __u32 *wqe_alloc_db; struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array; @@ -350,6 +531,7 @@ struct irdma_qp_uk { bool sq_flush_complete:1; /* Indicates flush was seen and SQ was empty after the flush */ bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */ bool destroy_pending:1; /* Indicates the QP is being destroyed */ + bool last_push_db:1; /* Indicates last DB was push DB */ void *back_qp; pthread_spinlock_t *lock; __u8 dbg_rq_flushed; @@ -372,6 +554,7 @@ struct irdma_cq_uk { struct irdma_qp_uk_init_info { struct irdma_qp_quanta *sq; struct irdma_qp_quanta *rq; + struct irdma_srq_uk *srq_uk; struct irdma_uk_attrs *uk_attrs; __u32 *wqe_alloc_db; __le64 *shadow_area; @@ -405,8 +588,9 @@ struct irdma_cq_uk_init_info { }; __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, __u32 *wqe_idx, - __u16 quanta, __u32 total_size, + __u16 *quanta, __u32 total_size, struct irdma_post_sq_info *info); +__le64 *irdma_srq_get_next_recv_wqe(struct irdma_srq_uk *srq, __u32 *wqe_idx); __le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, __u32 *wqe_idx); void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq); int irdma_nop(struct irdma_qp_uk *qp, __u64 wr_id, bool signaled, bool post_sq); @@ -415,10 +599,90 @@ int irdma_fragcnt_to_wqesize_rq(__u32 frag_cnt, __u16 *wqe_size); void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, __u32 sge, __u32 inline_data, __u8 *shift); int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, __u32 sq_size, - __u8 shift, __u32 *wqdepth); + __u8 shift, __u32 *sqdepth); int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, __u32 rq_size, - __u8 shift, __u32 *wqdepth); + __u8 shift, __u32 *rqdepth); +int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, __u32 srq_size, + __u8 shift, __u32 *srqdepth); void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, __u16 quanta, - __u32 wqe_idx, bool post_sq); + __u32 wqe_idx, bool push_wqe); void irdma_clr_wqes(struct irdma_qp_uk *qp, __u32 qp_wqe_idx); + +static inline struct qp_err_code irdma_ae_to_qp_err_code(__u16 ae_id) +{ + struct qp_err_code qp_err = {}; + + switch (ae_id) { + case IRDMA_AE_AMP_BOUNDS_VIOLATION: + case IRDMA_AE_AMP_INVALID_STAG: + case IRDMA_AE_AMP_RIGHTS_VIOLATION: + case IRDMA_AE_AMP_UNALLOCATED_STAG: + case IRDMA_AE_AMP_BAD_PD: + case IRDMA_AE_AMP_BAD_QP: + case IRDMA_AE_AMP_BAD_STAG_KEY: + case IRDMA_AE_AMP_BAD_STAG_INDEX: + case IRDMA_AE_AMP_TO_WRAP: + case IRDMA_AE_PRIV_OPERATION_DENIED: + qp_err.flush_code = FLUSH_PROT_ERR; + qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR; + break; + case IRDMA_AE_UDA_XMIT_BAD_PD: + case IRDMA_AE_WQE_UNEXPECTED_OPCODE: + qp_err.flush_code = FLUSH_LOC_QP_OP_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT: + case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: + case IRDMA_AE_UDA_L4LEN_INVALID: + case IRDMA_AE_DDP_UBE_INVALID_MO: + case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: + qp_err.flush_code = FLUSH_LOC_LEN_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: + case IRDMA_AE_IB_REMOTE_ACCESS_ERROR: + qp_err.flush_code = FLUSH_REM_ACCESS_ERR; + qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR; + break; + case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS: + case IRDMA_AE_AMP_MWBIND_BIND_DISABLED: + case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS: + case IRDMA_AE_AMP_MWBIND_VALID_STAG: + qp_err.flush_code = FLUSH_MW_BIND_ERR; + qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR; + break; + case IRDMA_AE_LLP_TOO_MANY_RETRIES: + qp_err.flush_code = FLUSH_RETRY_EXC_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + case IRDMA_AE_IB_INVALID_REQUEST: + qp_err.flush_code = FLUSH_REM_INV_REQ_ERR; + qp_err.event_type = IRDMA_QP_EVENT_REQ_ERR; + break; + case IRDMA_AE_LLP_SEGMENT_TOO_SMALL: + case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: + case IRDMA_AE_ROCE_RSP_LENGTH_ERROR: + case IRDMA_AE_IB_REMOTE_OP_ERROR: + qp_err.flush_code = FLUSH_REM_OP_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + case IRDMA_AE_LLP_TOO_MANY_RNRS: + qp_err.flush_code = FLUSH_RNR_RETRY_EXC_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + case IRDMA_AE_LCE_QP_CATASTROPHIC: + case IRDMA_AE_REMOTE_QP_CATASTROPHIC: + case IRDMA_AE_LOCAL_QP_CATASTROPHIC: + case IRDMA_AE_RCE_QP_CATASTROPHIC: + qp_err.flush_code = FLUSH_FATAL_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + default: + qp_err.flush_code = FLUSH_GENERAL_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + } + + return qp_err; +} #endif /* IRDMA_USER_H */ diff --git a/providers/irdma/uverbs.c b/providers/irdma/uverbs.c index 649428f59..511725af7 100644 --- a/providers/irdma/uverbs.c +++ b/providers/irdma/uverbs.c @@ -222,11 +222,6 @@ struct ibv_mw *irdma_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) struct ibv_alloc_mw cmd; struct ib_uverbs_alloc_mw_resp resp; - if (type != IBV_MW_TYPE_1) { - errno = ENOTSUP; - return NULL; - } - mw = calloc(1, sizeof(*mw)); if (!mw) return NULL; @@ -322,17 +317,191 @@ static void irdma_free_hw_buf(void *buf, size_t size) free(buf); } +/** + * irdma_uquery_srq - query srq + * @ibsrq: ib srq structure + * @attr: srq attributes to fill in + */ +int irdma_uquery_srq(struct ibv_srq *ibsrq, struct ibv_srq_attr *attr) +{ + struct ibv_query_srq cmd; + + return ibv_cmd_query_srq(ibsrq, attr, &cmd, sizeof(cmd)); +} + +/** + * irdma_umodify_srq - modify srq + * @ibsrq: ib srq structure + * @attr: srq attributes to use + * @attr_mask: mask of the attributes + */ +int irdma_umodify_srq(struct ibv_srq *ibsrq, + struct ibv_srq_attr *attr, + int attr_mask) +{ + struct ibv_modify_srq cmd; + + return ibv_cmd_modify_srq(ibsrq, attr, attr_mask, &cmd, sizeof(cmd)); +} + +/** + * irdma_udestroy_srq - destroy srq + * @ibsrq: ib srq structure + */ +int irdma_udestroy_srq(struct ibv_srq *ibsrq) +{ + struct irdma_usrq *iwusrq; + struct verbs_srq *vsrq; + int ret; + + vsrq = container_of(ibsrq, struct verbs_srq, srq); + iwusrq = container_of(vsrq, struct irdma_usrq, v_srq); + + ret = pthread_spin_destroy(&iwusrq->lock); + if (ret) + goto err; + + ret = ibv_cmd_destroy_srq(ibsrq); + if (ret) + return ret; + + ibv_cmd_dereg_mr(&iwusrq->vmr); + irdma_free_hw_buf(iwusrq->srq.srq_base, iwusrq->buf_size); + free(iwusrq); + return 0; +err: + return ret; +} + +/** + * irdma_ucreate_srq - create srq on user app + * @pd: pd for the qp + * @initattr: attributes of the srq to be created + */ +struct ibv_srq *irdma_ucreate_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *initattr) +{ + struct ib_uverbs_reg_mr_resp reg_mr_resp = {}; + struct irdma_srq_uk_init_info info = {}; + struct irdma_ucreate_srq_resp resp = {}; + struct irdma_ureg_mr reg_mr_cmd = {}; + struct irdma_ucreate_srq cmd = {}; + struct irdma_uk_attrs *uk_attrs; + struct irdma_uvcontext *iwvctx; + struct irdma_usrq *iwusrq; + struct ibv_srq_attr *attr; + size_t total_size; + size_t size; + __u32 depth; + __u8 shift; + int ret; + + iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx.context); + uk_attrs = &iwvctx->uk_attrs; + attr = &initattr->attr; + + if (!(uk_attrs->feature_flags & IRDMA_FEATURE_SRQ)) { + errno = EOPNOTSUPP; + return NULL; + } + + if (attr->max_sge > uk_attrs->max_hw_wq_frags || + attr->max_wr > uk_attrs->max_hw_srq_quanta) { + errno = EINVAL; + return NULL; + } + + irdma_get_wqe_shift(uk_attrs, attr->max_sge, 0, &shift); + + ret = irdma_get_srqdepth(uk_attrs, attr->max_wr, shift, &depth); + if (ret) { + errno = ret; + return NULL; + } + + iwusrq = calloc(1, sizeof(*iwusrq)); + if (!iwusrq) + return NULL; + + ret = pthread_spin_init(&iwusrq->lock, PTHREAD_PROCESS_PRIVATE); + if (ret) + goto err_lock; + + info.uk_attrs = uk_attrs; + info.max_srq_frag_cnt = attr->max_sge; + + size = roundup(depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); + total_size = size + IRDMA_DB_SHADOW_AREA_SIZE; + iwusrq->buf_size = total_size; + info.srq = irdma_calloc_hw_buf(total_size); + + if (!info.srq) { + ret = ENOMEM; + goto err_sges; + } + + memset(info.srq, 0, total_size); + reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_SRQ; + reg_mr_cmd.rq_pages = size >> IRDMA_HW_PAGE_SHIFT; + + ret = ibv_cmd_reg_mr(pd, info.srq, total_size, + (uintptr_t)info.srq, IBV_ACCESS_LOCAL_WRITE, + &iwusrq->vmr, ®_mr_cmd.ibv_cmd, + sizeof(reg_mr_cmd), ®_mr_resp, + sizeof(reg_mr_resp)); + if (ret) + goto err_cmd_reg; + + iwusrq->vmr.ibv_mr.pd = pd; + info.shadow_area = (__le64 *)((__u8 *)info.srq + size); + + cmd.user_srq_buf = (__u64)((uintptr_t)info.srq); + cmd.user_shadow_area = (__u64)((uintptr_t)info.shadow_area); + ret = ibv_cmd_create_srq(pd, &iwusrq->v_srq.srq, initattr, &cmd.ibv_cmd, + sizeof(cmd), &resp.ibv_resp, sizeof(resp)); + if (ret) + goto err_create_srq; + + info.uk_attrs = uk_attrs; + info.max_srq_frag_cnt = attr->max_sge; + info.srq_id = resp.srq_id; + info.srq_size = resp.srq_size; + + ret = irdma_uk_srq_init(&iwusrq->srq, &info); + if (ret) + goto err_srq_init; + + attr->max_wr = (depth - IRDMA_RQ_RSVD) >> shift; + + return &iwusrq->v_srq.srq; + +err_srq_init: + ibv_cmd_destroy_srq(&iwusrq->v_srq.srq); +err_create_srq: + ibv_cmd_dereg_mr(&iwusrq->vmr); +err_cmd_reg: + irdma_free_hw_buf(info.srq, total_size); +err_sges: + pthread_spin_destroy(&iwusrq->lock); +err_lock: + free(iwusrq); + + errno = ret; + return NULL; +} + /** * get_cq_size - returns actual cqe needed by HW * @ncqe: minimum cqes requested by application * @hw_rev: HW generation + * @cqe_64byte_ena: enable 64byte cqe */ -static inline int get_cq_size(int ncqe, __u8 hw_rev) +static inline int get_cq_size(int ncqe, __u8 hw_rev, bool cqe_64byte_ena) { ncqe++; - /* Completions with immediate require 1 extra entry */ - if (hw_rev > IRDMA_GEN_1) + if (!cqe_64byte_ena && hw_rev > IRDMA_GEN_1) + /* Completions with immediate require 1 extra entry */ ncqe *= 2; if (ncqe < IRDMA_U_MINCQ_SIZE) @@ -341,9 +510,12 @@ static inline int get_cq_size(int ncqe, __u8 hw_rev) return ncqe; } -static inline size_t get_cq_total_bytes(__u32 cq_size) +static inline size_t get_cq_total_bytes(__u32 cq_size, bool cqe_64byte_ena) { - return roundup(cq_size * sizeof(struct irdma_cqe), IRDMA_HW_PAGE_SIZE); + if (cqe_64byte_ena) + return roundup(cq_size * sizeof(struct irdma_extended_cqe), IRDMA_HW_PAGE_SIZE); + else + return roundup(cq_size * sizeof(struct irdma_cqe), IRDMA_HW_PAGE_SIZE); } /** @@ -370,14 +542,20 @@ static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context, __u32 cq_pages; int ret, ncqe; __u8 hw_rev; + bool cqe_64byte_ena; iwvctx = container_of(context, struct irdma_uvcontext, ibv_ctx.context); uk_attrs = &iwvctx->uk_attrs; hw_rev = uk_attrs->hw_rev; - if (ext_cq && hw_rev == IRDMA_GEN_1) { - errno = EOPNOTSUPP; - return NULL; + if (ext_cq) { + __u32 supported_flags = hw_rev >= IRDMA_GEN_3 ? + IRDMA_GEN3_WC_FLAGS_EX : IRDMA_STANDARD_WC_FLAGS_EX; + + if (hw_rev == IRDMA_GEN_1 || attr_ex->wc_flags & ~supported_flags) { + errno = EOPNOTSUPP; + return NULL; + } } if (attr_ex->cqe < IRDMA_MIN_CQ_SIZE || attr_ex->cqe > uk_attrs->max_hw_cq_size - 1) { @@ -396,10 +574,10 @@ static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context, return NULL; } - info.cq_size = get_cq_size(attr_ex->cqe, hw_rev); - iwucq->comp_vector = attr_ex->comp_vector; + cqe_64byte_ena = (uk_attrs->feature_flags & IRDMA_FEATURE_64_BYTE_CQE) ? true : false; + info.cq_size = get_cq_size(attr_ex->cqe, hw_rev, cqe_64byte_ena); list_head_init(&iwucq->resize_list); - total_size = get_cq_total_bytes(info.cq_size); + total_size = get_cq_total_bytes(info.cq_size, cqe_64byte_ena); cq_pages = total_size >> IRDMA_HW_PAGE_SHIFT; if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) @@ -468,6 +646,8 @@ static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context, info.cq_id = resp.cq_id; /* Do not report the cqe's burned by HW */ iwucq->verbs_cq.cq.cqe = ncqe; + if (cqe_64byte_ena) + info.avoid_mem_cflct = true; info.cqe_alloc_db = (__u32 *)((__u8 *)iwvctx->db + IRDMA_DB_CQ_OFFSET); irdma_uk_cq_init(&iwucq->cq, &info); @@ -509,11 +689,6 @@ struct ibv_cq *irdma_ucreate_cq(struct ibv_context *context, int cqe, struct ibv_cq_ex *irdma_ucreate_cq_ex(struct ibv_context *context, struct ibv_cq_init_attr_ex *attr_ex) { - if (attr_ex->wc_flags & ~IRDMA_CQ_SUPPORTED_WC_FLAGS) { - errno = EOPNOTSUPP; - return NULL; - } - return ucreate_cq(context, attr_ex, true); } @@ -524,7 +699,7 @@ struct ibv_cq_ex *irdma_ucreate_cq_ex(struct ibv_context *context, static void irdma_free_cq_buf(struct irdma_cq_buf *cq_buf) { ibv_cmd_dereg_mr(&cq_buf->vmr); - irdma_free_hw_buf(cq_buf->cq.cq_base, get_cq_total_bytes(cq_buf->cq.cq_size)); + irdma_free_hw_buf(cq_buf->cq.cq_base, cq_buf->buf_size); free(cq_buf); } @@ -605,12 +780,14 @@ static enum ibv_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcod return IBV_WC_LOC_LEN_ERR; case FLUSH_GENERAL_ERR: return IBV_WC_WR_FLUSH_ERR; - case FLUSH_RETRY_EXC_ERR: - return IBV_WC_RETRY_EXC_ERR; case FLUSH_MW_BIND_ERR: return IBV_WC_MW_BIND_ERR; case FLUSH_REM_INV_REQ_ERR: return IBV_WC_REM_INV_REQ_ERR; + case FLUSH_RETRY_EXC_ERR: + return IBV_WC_RETRY_EXC_ERR; + case FLUSH_RNR_RETRY_EXC_ERR: + return IBV_WC_RNR_RETRY_EXC_ERR; case FLUSH_FATAL_ERR: default: return IBV_WC_FATAL_ERR; @@ -636,6 +813,12 @@ static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cur_cqe, struct ib case IRDMA_OP_TYPE_BIND_MW: entry->opcode = IBV_WC_BIND_MW; break; + case IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP: + entry->opcode = IBV_WC_COMP_SWAP; + break; + case IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD: + entry->opcode = IBV_WC_FETCH_ADD; + break; case IRDMA_OP_TYPE_INV_STAG: entry->opcode = IBV_WC_LOCAL_INV; break; @@ -644,6 +827,18 @@ static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cur_cqe, struct ib } } +static inline void set_ib_wc_op_rq_gen_3(struct irdma_cq_poll_info *cur_cqe, struct ibv_wc *entry) +{ + switch (cur_cqe->op_type) { + case IRDMA_OP_TYPE_RDMA_WRITE: + case IRDMA_OP_TYPE_RDMA_WRITE_SOL: + entry->opcode = IBV_WC_RECV_RDMA_WITH_IMM; + break; + default: + entry->opcode = IBV_WC_RECV; + } +} + static inline void set_ib_wc_op_rq(struct irdma_cq_poll_info *cur_cqe, struct ibv_wc *entry, bool send_imm_support) { @@ -716,9 +911,12 @@ static void irdma_process_cqe(struct ibv_wc *entry, struct irdma_cq_poll_info *c if (cur_cqe->q_type == IRDMA_CQE_QTYPE_SQ) { set_ib_wc_op_sq(cur_cqe, entry); } else { - set_ib_wc_op_rq(cur_cqe, entry, - qp->qp_caps & IRDMA_SEND_WITH_IMM ? - true : false); + if (qp->uk_attrs->hw_rev <= IRDMA_GEN_2) + set_ib_wc_op_rq(cur_cqe, entry, + qp->qp_caps & IRDMA_SEND_WITH_IMM ? + true : false); + else + set_ib_wc_op_rq_gen_3(cur_cqe, entry); if (ib_qp->qp_type != IBV_QPT_UD && cur_cqe->stag_invalid_set) { entry->invalidated_rkey = cur_cqe->inv_stag; @@ -941,27 +1139,11 @@ static void irdma_end_poll(struct ibv_cq_ex *ibvcq_ex) * Get completion timestamp in HCA clock units */ static uint64_t irdma_wc_read_completion_ts(struct ibv_cq_ex *ibvcq_ex) -{ - struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, - verbs_cq.cq_ex); -#define HCA_CORE_CLOCK_800_MHZ 800 - - return iwucq->cur_cqe.tcp_seq_num_rtt / HCA_CORE_CLOCK_800_MHZ; -} - -/** - * irdma_wc_read_completion_wallclock_ns - Get completion timestamp in ns - * @ibvcq_ex: ibv extended CQ - * - * Get completion timestamp from current completion in wall clock nanoseconds - */ -static uint64_t irdma_wc_read_completion_wallclock_ns(struct ibv_cq_ex *ibvcq_ex) { struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); - /* RTT is in usec */ - return iwucq->cur_cqe.tcp_seq_num_rtt * 1000; + return iwucq->cur_cqe.stat.timestamp; } static enum ibv_wc_opcode irdma_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) @@ -982,6 +1164,10 @@ static enum ibv_wc_opcode irdma_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) return IBV_WC_SEND; case IRDMA_OP_TYPE_BIND_MW: return IBV_WC_BIND_MW; + case IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP: + return IBV_WC_COMP_SWAP; + case IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD: + return IBV_WC_FETCH_ADD; case IRDMA_OP_TYPE_REC: return IBV_WC_RECV; case IRDMA_OP_TYPE_REC_IMM: @@ -1104,14 +1290,8 @@ void irdma_ibvcq_ex_fill_priv_funcs(struct irdma_ucq *iwucq, ibvcq_ex->end_poll = irdma_end_poll; ibvcq_ex->next_poll = irdma_next_poll; - if (attr_ex->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) { + if (attr_ex->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) ibvcq_ex->read_completion_ts = irdma_wc_read_completion_ts; - iwucq->report_rtt = true; - } - if (attr_ex->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) { - ibvcq_ex->read_completion_wallclock_ns = irdma_wc_read_completion_wallclock_ns; - iwucq->report_rtt = true; - } ibvcq_ex->read_opcode = irdma_wc_read_opcode; ibvcq_ex->read_vendor_err = irdma_wc_read_vendor_err; @@ -1358,6 +1538,17 @@ struct ibv_qp *irdma_ucreate_qp(struct ibv_pd *pd, ibv_ctx.context); uk_attrs = &iwvctx->uk_attrs; + if (attr->srq) { + struct irdma_usrq *iwusrq; + struct verbs_srq *vsrq; + + vsrq = container_of(attr->srq, struct verbs_srq, srq); + iwusrq = container_of(vsrq, struct irdma_usrq, v_srq); + attr->cap.max_recv_sge = uk_attrs->max_hw_wq_frags; + attr->cap.max_recv_wr = 1; + info.srq_uk = &iwusrq->srq; + } + if (attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta || @@ -1596,6 +1787,21 @@ int irdma_udestroy_qp(struct ibv_qp *qp) return ret; } +/** + * calc_type2_mw_stag - calculate type 2 MW stag + * @rkey: desired rkey of the MW + * @mw_rkey: type2 memory window rkey + * + * compute type2 memory window stag by taking lower 8 bits + * of the desired rkey and leaving 24 bits if mw->rkey unchanged + */ +static inline __u32 calc_type2_mw_stag(__u32 rkey, __u32 mw_rkey) +{ + const __u32 mask = 0xff; + + return (rkey & mask) | (mw_rkey & ~mask); +} + /** * irdma_post_send - post send wr for user application * @ib_qp: qp to post wr @@ -1637,6 +1843,29 @@ int irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, info.report_rtt = true; switch (ib_wr->opcode) { + case IBV_WR_ATOMIC_CMP_AND_SWP: + info.op_type = IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP; + info.op.atomic_compare_swap.tagged_offset = ib_wr->sg_list[0].addr; + info.op.atomic_compare_swap.remote_tagged_offset = + ib_wr->wr.atomic.remote_addr; + info.op.atomic_compare_swap.swap_data_bytes = ib_wr->wr.atomic.swap; + info.op.atomic_compare_swap.compare_data_bytes = + ib_wr->wr.atomic.compare_add; + info.op.atomic_compare_swap.stag = ib_wr->sg_list[0].lkey; + info.op.atomic_compare_swap.remote_stag = ib_wr->wr.atomic.rkey; + err = irdma_uk_atomic_compare_swap(&iwuqp->qp, &info, false); + break; + case IBV_WR_ATOMIC_FETCH_AND_ADD: + info.op_type = IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD; + info.op.atomic_fetch_add.tagged_offset = ib_wr->sg_list[0].addr; + info.op.atomic_fetch_add.remote_tagged_offset = + ib_wr->wr.atomic.remote_addr; + info.op.atomic_fetch_add.fetch_add_data_bytes = + ib_wr->wr.atomic.compare_add; + info.op.atomic_fetch_add.stag = ib_wr->sg_list[0].lkey; + info.op.atomic_fetch_add.remote_stag = ib_wr->wr.atomic.rkey; + err = irdma_uk_atomic_fetch_add(&iwuqp->qp, &info, false); + break; case IBV_WR_SEND_WITH_IMM: if (iwuqp->qp.qp_caps & IRDMA_SEND_WITH_IMM) { info.imm_data_valid = true; @@ -1721,8 +1950,20 @@ int irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, } info.op_type = IRDMA_OP_TYPE_BIND_MW; info.op.bind_window.mr_stag = ib_wr->bind_mw.bind_info.mr->rkey; - info.op.bind_window.mem_window_type_1 = true; - info.op.bind_window.mw_stag = ib_wr->bind_mw.rkey; + if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) { + info.op.bind_window.mem_window_type_1 = true; + info.op.bind_window.mw_stag = ib_wr->bind_mw.rkey; + } else { + struct verbs_mr *vmr = verbs_get_mr(ib_wr->bind_mw.bind_info.mr); + + if (vmr->access & IBV_ACCESS_ZERO_BASED) { + err = EINVAL; + break; + } + info.op.bind_window.mw_stag = + calc_type2_mw_stag(ib_wr->bind_mw.rkey, ib_wr->bind_mw.mw->rkey); + ib_wr->bind_mw.mw->rkey = info.op.bind_window.mw_stag; + } if (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_ZERO_BASED) { info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_ZERO_BASED; @@ -1737,6 +1978,8 @@ int irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_READ) ? 1 : 0; info.op.bind_window.ena_writes = (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_WRITE) ? 1 : 0; + info.op.bind_window.remote_atomics_en = + (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_ATOMIC) ? 1 : 0; err = irdma_uk_mw_bind(&iwuqp->qp, &info, false); break; @@ -1759,7 +2002,8 @@ int irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, if (err) *bad_wr = ib_wr; - irdma_uk_qp_post_wr(&iwuqp->qp); + if (!iwuqp->qp.push_db) + irdma_uk_qp_post_wr(&iwuqp->qp); if (reflush) irdma_issue_flush(ib_qp, 1, 0); @@ -1768,6 +2012,51 @@ int irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, return err; } +/** + * irdma_upost_srq - post receive wr for user application + * @ib_wr: work request for receive + * @bad_wr: bad wr caused an error + */ +int irdma_upost_srq(struct ibv_srq *ibsrq, struct ibv_recv_wr *ib_wr, + struct ibv_recv_wr **bad_wr) +{ + struct irdma_post_rq_info post_recv = {}; + struct irdma_usrq *iwusrq; + struct irdma_srq_uk *srq; + struct verbs_srq *vsrq; + int err; + + vsrq = container_of(ibsrq, struct verbs_srq, srq); + iwusrq = container_of(vsrq, struct irdma_usrq, v_srq); + srq = &iwusrq->srq; + + err = pthread_spin_lock(&iwusrq->lock); + if (err) + return err; + + while (ib_wr) { + if (ib_wr->num_sge > srq->max_srq_frag_cnt) { + *bad_wr = ib_wr; + err = EINVAL; + goto error; + } + post_recv.num_sges = ib_wr->num_sge; + post_recv.wr_id = ib_wr->wr_id; + post_recv.sg_list = ib_wr->sg_list; + err = irdma_uk_srq_post_receive(srq, &post_recv); + if (err) { + *bad_wr = ib_wr; + goto error; + } + + ib_wr = ib_wr->next; + } +error: + pthread_spin_unlock(&iwusrq->lock); + + return err; +} + /** * irdma_post_recv - post receive wr for user application * @ib_wr: work request for receive @@ -1782,6 +2071,10 @@ int irdma_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, int err; iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); + if (iwuqp->qp.srq_uk) { + *bad_wr = ib_wr; + return EINVAL; + } err = pthread_spin_lock(&iwuqp->lock); if (err) @@ -1918,6 +2211,7 @@ int irdma_uresize_cq(struct ibv_cq *cq, int cqe) __u32 cq_pages; int cqe_needed; int ret = 0; + bool cqe_64byte_ena; iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); iwvctx = container_of(cq->context, struct irdma_uvcontext, @@ -1930,8 +2224,10 @@ int irdma_uresize_cq(struct ibv_cq *cq, int cqe) if (cqe > IRDMA_MAX_CQ_SIZE) return EINVAL; + cqe_64byte_ena = (uk_attrs->feature_flags & IRDMA_FEATURE_64_BYTE_CQE) ? + true : false; cqe_needed = cqe + 1; - if (uk_attrs->hw_rev > IRDMA_GEN_1) + if (!cqe_64byte_ena && uk_attrs->hw_rev > IRDMA_GEN_1) cqe_needed *= 2; if (cqe_needed < IRDMA_U_MINCQ_SIZE) @@ -1940,7 +2236,7 @@ int irdma_uresize_cq(struct ibv_cq *cq, int cqe) if (cqe_needed == iwucq->cq.cq_size) return 0; - cq_size = get_cq_total_bytes(cqe_needed); + cq_size = get_cq_total_bytes(cqe_needed, cqe_64byte_ena); cq_pages = cq_size >> IRDMA_HW_PAGE_SHIFT; cq_base = irdma_calloc_hw_buf(cq_size); if (!cq_base) @@ -1974,6 +2270,7 @@ int irdma_uresize_cq(struct ibv_cq *cq, int cqe) goto err_resize; memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq)); + cq_buf->buf_size = iwucq->buf_size; cq_buf->vmr = iwucq->vmr; iwucq->vmr = new_mr; irdma_uk_cq_resize(&iwucq->cq, cq_base, cqe_needed);