whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 582549e3fbe137eb6ce9be591aca25c2222a36b4
parent ed79cc87302bf7fbc87f05d655b998f866b4fed8
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Wed, 10 Apr 2019 09:39:04 -1000

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "Several driver bug fixes posted in the last several weeks

   - Several bug fixes for the hfi1 driver 'TID RDMA' functionality
     merged into 5.1. Since TID RDMA is on by default these all seem to
     be regressions.

   - Wrong software permission checks on memory in mlx5

   - Memory leak in vmw_pvrdma during driver remove

   - Several bug fixes for hns driver features merged into 5.1"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  IB/hfi1: Do not flush send queue in the TID RDMA second leg
  RDMA/hns: Bugfix for SCC hem free
  RDMA/hns: Fix bug that caused srq creation to fail
  RDMA/vmw_pvrdma: Fix memory leak on pvrdma_pci_remove
  IB/mlx5: Reset access mask when looping inside page fault handler
  IB/hfi1: Fix the allocation of RSM table
  IB/hfi1: Eliminate opcode tests on mr deref
  IB/hfi1: Clear the IOWAIT pending bits when QP is put into error state
  IB/hfi1: Failed to drain send queue when QP is put into error state

Diffstat:
Mdrivers/infiniband/hw/hfi1/chip.c | 26+++++++++++++++++++-------
Mdrivers/infiniband/hw/hfi1/qp.c | 4+++-
Mdrivers/infiniband/hw/hfi1/rc.c | 4++--
Mdrivers/infiniband/hw/hfi1/tid_rdma.c | 31++++++++-----------------------
Mdrivers/infiniband/hw/hns/hns_roce_hem.c | 6++++--
Mdrivers/infiniband/hw/hns/hns_roce_mr.c | 4++--
Mdrivers/infiniband/hw/hns/hns_roce_qp.c | 3---
Mdrivers/infiniband/hw/mlx5/odp.c | 3++-
Mdrivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 2++
9 files changed, 42 insertions(+), 41 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c @@ -13232,7 +13232,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) int total_contexts; int ret; unsigned ngroups; - int qos_rmt_count; + int rmt_count; int user_rmt_reduced; u32 n_usr_ctxts; u32 send_contexts = chip_send_contexts(dd); @@ -13294,10 +13294,20 @@ static int set_up_context_variables(struct hfi1_devdata *dd) n_usr_ctxts = rcv_contexts - total_contexts; } - /* each user context requires an entry in the RMT */ - qos_rmt_count = qos_rmt_entries(dd, NULL, NULL); - if (qos_rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { - user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count; + /* + * The RMT entries are currently allocated as shown below: + * 1. QOS (0 to 128 entries); + * 2. FECN for PSM (num_user_contexts + num_vnic_contexts); + * 3. VNIC (num_vnic_contexts). + * It should be noted that PSM FECN oversubscribe num_vnic_contexts + * entries of RMT because both VNIC and PSM could allocate any receive + * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts, + * and PSM FECN must reserve an RMT entry for each possible PSM receive + * context. + */ + rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2); + if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { + user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count; dd_dev_err(dd, "RMT size is reducing the number of user receive contexts from %u to %d\n", n_usr_ctxts, @@ -14285,9 +14295,11 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, u64 reg; int i, idx, regoff, regidx; u8 offset; + u32 total_cnt; /* there needs to be enough room in the map table */ - if (rmt->used + dd->num_user_contexts >= NUM_MAP_ENTRIES) { + total_cnt = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt; + if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) { dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n"); return; } @@ -14341,7 +14353,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, /* add rule 1 */ add_rsm_rule(dd, RSM_INS_FECN, &rrd); - rmt->used += dd->num_user_contexts; + rmt->used += total_cnt; } /* Initialize RSM for VNIC */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c @@ -898,7 +898,9 @@ void notify_error_qp(struct rvt_qp *qp) if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) { - qp->s_flags &= ~RVT_S_ANY_WAIT_IO; + qp->s_flags &= ~HFI1_S_ANY_WAIT_IO; + iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB); + iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID); list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; rvt_put_qp(qp); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c @@ -3088,7 +3088,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { + if (e->rdma_sge.mr) { rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } @@ -3166,7 +3166,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { + if (e->rdma_sge.mr) { rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -5017,24 +5017,14 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps) make_tid_rdma_ack(qp, ohdr, ps)) return 1; - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { - if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) - goto bail; - /* We are in the error state, flush the work request. */ - if (qp->s_last == READ_ONCE(qp->s_head)) - goto bail; - /* If DMAs are in progress, we can't flush immediately. */ - if (iowait_sdma_pending(&priv->s_iowait)) { - qp->s_flags |= RVT_S_WAIT_DMA; - goto bail; - } - clear_ahg(qp); - wqe = rvt_get_swqe_ptr(qp, qp->s_last); - hfi1_trdma_send_complete(qp, wqe, qp->s_last != qp->s_acked ? - IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); - /* will get called again */ - goto done_free_tx; - } + /* + * Bail out if we can't send data. + * Be reminded that this check must been done after the call to + * make_tid_rdma_ack() because the responding QP could be in + * RTR state where it can send TID RDMA ACK, not TID RDMA WRITE DATA. + */ + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) + goto bail; if (priv->s_flags & RVT_S_WAIT_ACK) goto bail; @@ -5144,11 +5134,6 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps) hfi1_make_ruc_header(qp, ohdr, (opcode << 24), bth1, bth2, middle, ps); return 1; -done_free_tx: - hfi1_put_txreq(ps->s_txreq); - ps->s_txreq = NULL; - return 1; - bail: hfi1_put_txreq(ps->s_txreq); bail_no_tx: diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -792,6 +792,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, idx_offset = (obj & (table->num_obj - 1)) % obj_per_chunk; dma_offset = offset = idx_offset * table->obj_size; } else { + u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */ + hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop); /* mtt mhop */ i = mhop.l0_idx; @@ -803,8 +805,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, hem_idx = i; hem = table->hem[hem_idx]; - dma_offset = offset = (obj & (table->num_obj - 1)) * - table->obj_size % mhop.bt_chunk_size; + dma_offset = offset = (obj & (table->num_obj - 1)) * seg_size % + mhop.bt_chunk_size; if (mhop.hop_num == 2) dma_offset = offset = 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -746,7 +746,6 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table; dma_addr_t dma_handle; __le64 *mtts; - u32 s = start_index * sizeof(u64); u32 bt_page_size; u32 i; @@ -780,7 +779,8 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, return -EINVAL; mtts = hns_roce_table_find(hr_dev, table, - mtt->first_seg + s / hr_dev->caps.mtt_entry_sz, + mtt->first_seg + + start_index / HNS_ROCE_MTT_ENTRY_PER_SEG, &dma_handle); if (!mtts) return -ENOMEM; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -274,9 +274,6 @@ void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) wait_for_completion(&hr_qp->free); if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) { - if (hr_dev->caps.sccc_entry_sz) - hns_roce_table_put(hr_dev, &qp_table->sccc_table, - hr_qp->qpn); if (hr_dev->caps.trrl_entry_sz) hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c @@ -585,7 +585,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; - u64 access_mask = ODP_READ_ALLOWED_BIT; + u64 access_mask; u64 start_idx, page_mask; struct ib_umem_odp *odp; size_t size; @@ -607,6 +607,7 @@ next_mr: page_shift = mr->umem->page_shift; page_mask = ~(BIT(page_shift) - 1); start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; + access_mask = ODP_READ_ALLOWED_BIT; if (prefetch && !downgrade && !mr->umem->writable) { /* prefetch with write-access must diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -1131,6 +1131,8 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); pvrdma_page_dir_cleanup(dev, &dev->async_pdir); pvrdma_free_slots(dev); + dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, + dev->dsrbase); iounmap(dev->regs); kfree(dev->sgid_tbl);