whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit ad0371482b1ed599a0e9f8cce50e8499c7c1497b
parent c127e59bee3e26d8059683dcb919006b6a76e90c
Author: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date:   Thu, 27 Sep 2018 21:53:55 +0200

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Jason writes:
  "Second RDMA rc pull request

   - Fix a long standing race bug when destroying comp_event file descriptors

   - srp, hfi1, bnxt_re: Various driver crashes from missing validation
     and other cases

   - Fixes for regressions in patches merged this window in the gid
     cache, devx, ucma and uapi."

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/core: Set right entry state before releasing reference
  IB/mlx5: Destroy the DEVX object upon error flow
  IB/uverbs: Free uapi on destroy
  RDMA/bnxt_re: Fix system crash during RDMA resource initialization
  IB/hfi1: Fix destroy_qp hang after a link down
  IB/hfi1: Fix context recovery when PBC has an UnsupportedVL
  IB/hfi1: Invalid user input can result in crash
  IB/hfi1: Fix SL array bounds check
  RDMA/uverbs: Fix validity check for modify QP
  IB/srp: Avoid that sg_reset -d ${srp_device} triggers an infinite loop
  ucma: fix a use-after-free in ucma_resolve_ip()
  RDMA/uverbs: Atomically flush and mark closed the comp event queue
  cxgb4: fix abort_req_rss6 struct

Diffstat:
Mdrivers/infiniband/core/cache.c | 68++++++++++++++++++++++++++++++++++----------------------------------
Mdrivers/infiniband/core/ucma.c | 2++
Mdrivers/infiniband/core/uverbs_cmd.c | 68+++++++++++++++++++++++++++++++++++++++++++++-----------------------
Mdrivers/infiniband/core/uverbs_main.c | 1+
Mdrivers/infiniband/core/uverbs_uapi.c | 1+
Mdrivers/infiniband/hw/bnxt_re/main.c | 93++++++++++++++++++++++++++++++++-----------------------------------------------
Mdrivers/infiniband/hw/hfi1/chip.c | 6+++++-
Mdrivers/infiniband/hw/hfi1/pio.c | 51+++++++++++++++++++++++++++++++++++++++++----------
Mdrivers/infiniband/hw/hfi1/pio.h | 2++
Mdrivers/infiniband/hw/hfi1/user_sdma.c | 2+-
Mdrivers/infiniband/hw/hfi1/verbs.c | 8+++++++-
Mdrivers/infiniband/hw/mlx5/devx.c | 5++++-
Mdrivers/infiniband/ulp/srp/ib_srp.c | 6+++---
Mdrivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 1-
14 files changed, 184 insertions(+), 130 deletions(-)

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c @@ -338,6 +338,39 @@ static int add_roce_gid(struct ib_gid_table_entry *entry) } /** + * del_gid - Delete GID table entry + * + * @ib_dev: IB device whose GID entry to be deleted + * @port: Port number of the IB device + * @table: GID table of the IB device for a port + * @ix: GID entry index to delete + * + */ +static void del_gid(struct ib_device *ib_dev, u8 port, + struct ib_gid_table *table, int ix) +{ + struct ib_gid_table_entry *entry; + + lockdep_assert_held(&table->lock); + + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, + ib_dev->name, port, ix, + table->data_vec[ix]->attr.gid.raw); + + write_lock_irq(&table->rwlock); + entry = table->data_vec[ix]; + entry->state = GID_TABLE_ENTRY_PENDING_DEL; + /* + * For non RoCE protocol, GID entry slot is ready to use. + */ + if (!rdma_protocol_roce(ib_dev, port)) + table->data_vec[ix] = NULL; + write_unlock_irq(&table->rwlock); + + put_gid_entry_locked(entry); +} + +/** * add_modify_gid - Add or modify GID table entry * * @table: GID table in which GID to be added or modified @@ -358,7 +391,7 @@ static int add_modify_gid(struct ib_gid_table *table, * this index. */ if (is_gid_entry_valid(table->data_vec[attr->index])) - put_gid_entry(table->data_vec[attr->index]); + del_gid(attr->device, attr->port_num, table, attr->index); /* * Some HCA's report multiple GID entries with only one valid GID, and @@ -386,39 +419,6 @@ done: return ret; } -/** - * del_gid - Delete GID table entry - * - * @ib_dev: IB device whose GID entry to be deleted - * @port: Port number of the IB device - * @table: GID table of the IB device for a port - * @ix: GID entry index to delete - * - */ -static void del_gid(struct ib_device *ib_dev, u8 port, - struct ib_gid_table *table, int ix) -{ - struct ib_gid_table_entry *entry; - - lockdep_assert_held(&table->lock); - - pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, - ib_dev->name, port, ix, - table->data_vec[ix]->attr.gid.raw); - - write_lock_irq(&table->rwlock); - entry = table->data_vec[ix]; - entry->state = GID_TABLE_ENTRY_PENDING_DEL; - /* - * For non RoCE protocol, GID entry slot is ready to use. - */ - if (!rdma_protocol_roce(ib_dev, port)) - table->data_vec[ix] = NULL; - write_unlock_irq(&table->rwlock); - - put_gid_entry_locked(entry); -} - /* rwlock should be read locked, or lock should be held */ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, const struct ib_gid_attr *val, bool default_gid, diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c @@ -1759,6 +1759,8 @@ static int ucma_close(struct inode *inode, struct file *filp) mutex_lock(&mut); if (!ctx->closing) { mutex_unlock(&mut); + ucma_put_ctx(ctx); + wait_for_completion(&ctx->comp); /* rdma_destroy_id ensures that no event handlers are * inflight for that id before releasing it. */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c @@ -2027,33 +2027,55 @@ static int modify_qp(struct ib_uverbs_file *file, if ((cmd->base.attr_mask & IB_QP_CUR_STATE && cmd->base.cur_qp_state > IB_QPS_ERR) || - cmd->base.qp_state > IB_QPS_ERR) { + (cmd->base.attr_mask & IB_QP_STATE && + cmd->base.qp_state > IB_QPS_ERR)) { ret = -EINVAL; goto release_qp; } - attr->qp_state = cmd->base.qp_state; - attr->cur_qp_state = cmd->base.cur_qp_state; - attr->path_mtu = cmd->base.path_mtu; - attr->path_mig_state = cmd->base.path_mig_state; - attr->qkey = cmd->base.qkey; - attr->rq_psn = cmd->base.rq_psn; - attr->sq_psn = cmd->base.sq_psn; - attr->dest_qp_num = cmd->base.dest_qp_num; - attr->qp_access_flags = cmd->base.qp_access_flags; - attr->pkey_index = cmd->base.pkey_index; - attr->alt_pkey_index = cmd->base.alt_pkey_index; - attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify; - attr->max_rd_atomic = cmd->base.max_rd_atomic; - attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic; - attr->min_rnr_timer = cmd->base.min_rnr_timer; - attr->port_num = cmd->base.port_num; - attr->timeout = cmd->base.timeout; - attr->retry_cnt = cmd->base.retry_cnt; - attr->rnr_retry = cmd->base.rnr_retry; - attr->alt_port_num = cmd->base.alt_port_num; - attr->alt_timeout = cmd->base.alt_timeout; - attr->rate_limit = cmd->rate_limit; + if (cmd->base.attr_mask & IB_QP_STATE) + attr->qp_state = cmd->base.qp_state; + if (cmd->base.attr_mask & IB_QP_CUR_STATE) + attr->cur_qp_state = cmd->base.cur_qp_state; + if (cmd->base.attr_mask & IB_QP_PATH_MTU) + attr->path_mtu = cmd->base.path_mtu; + if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE) + attr->path_mig_state = cmd->base.path_mig_state; + if (cmd->base.attr_mask & IB_QP_QKEY) + attr->qkey = cmd->base.qkey; + if (cmd->base.attr_mask & IB_QP_RQ_PSN) + attr->rq_psn = cmd->base.rq_psn; + if (cmd->base.attr_mask & IB_QP_SQ_PSN) + attr->sq_psn = cmd->base.sq_psn; + if (cmd->base.attr_mask & IB_QP_DEST_QPN) + attr->dest_qp_num = cmd->base.dest_qp_num; + if (cmd->base.attr_mask & IB_QP_ACCESS_FLAGS) + attr->qp_access_flags = cmd->base.qp_access_flags; + if (cmd->base.attr_mask & IB_QP_PKEY_INDEX) + attr->pkey_index = cmd->base.pkey_index; + if (cmd->base.attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) + attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify; + if (cmd->base.attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + attr->max_rd_atomic = cmd->base.max_rd_atomic; + if (cmd->base.attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic; + if (cmd->base.attr_mask & IB_QP_MIN_RNR_TIMER) + attr->min_rnr_timer = cmd->base.min_rnr_timer; + if (cmd->base.attr_mask & IB_QP_PORT) + attr->port_num = cmd->base.port_num; + if (cmd->base.attr_mask & IB_QP_TIMEOUT) + attr->timeout = cmd->base.timeout; + if (cmd->base.attr_mask & IB_QP_RETRY_CNT) + attr->retry_cnt = cmd->base.retry_cnt; + if (cmd->base.attr_mask & IB_QP_RNR_RETRY) + attr->rnr_retry = cmd->base.rnr_retry; + if (cmd->base.attr_mask & IB_QP_ALT_PATH) { + attr->alt_port_num = cmd->base.alt_port_num; + attr->alt_timeout = cmd->base.alt_timeout; + attr->alt_pkey_index = cmd->base.alt_pkey_index; + } + if (cmd->base.attr_mask & IB_QP_RATE_LIMIT) + attr->rate_limit = cmd->rate_limit; if (cmd->base.attr_mask & IB_QP_AV) copy_ah_attr_from_uverbs(qp->device, &attr->ah_attr, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c @@ -440,6 +440,7 @@ static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) list_del(&entry->obj_list); kfree(entry); } + file->ev_queue.is_closed = 1; spin_unlock_irq(&file->ev_queue.lock); uverbs_close_fd(filp); diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c @@ -248,6 +248,7 @@ void uverbs_destroy_api(struct uverbs_api *uapi) kfree(rcu_dereference_protected(*slot, true)); radix_tree_iter_delete(&uapi->radix, &iter, slot); } + kfree(uapi); } struct uverbs_api *uverbs_alloc_api( diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c @@ -78,7 +78,7 @@ static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list); /* Mutex to protect the list of bnxt_re devices added */ static DEFINE_MUTEX(bnxt_re_dev_lock); static struct workqueue_struct *bnxt_re_wq; -static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait); +static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev); /* SR-IOV helper functions */ @@ -182,7 +182,7 @@ static void bnxt_re_shutdown(void *p) if (!rdev) return; - bnxt_re_ib_unreg(rdev, false); + bnxt_re_ib_unreg(rdev); } static void bnxt_re_stop_irq(void *handle) @@ -251,7 +251,7 @@ static struct bnxt_ulp_ops bnxt_re_ulp_ops = { /* Driver registration routines used to let the networking driver (bnxt_en) * to know that the RoCE driver is now installed */ -static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait) +static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev; int rc; @@ -260,14 +260,9 @@ static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait) return -EINVAL; en_dev = rdev->en_dev; - /* Acquire rtnl lock if it is not invokded from netdev event */ - if (lock_wait) - rtnl_lock(); rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev, BNXT_ROCE_ULP); - if (lock_wait) - rtnl_unlock(); return rc; } @@ -281,14 +276,12 @@ static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev) en_dev = rdev->en_dev; - rtnl_lock(); rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP, &bnxt_re_ulp_ops, rdev); - rtnl_unlock(); return rc; } -static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait) +static int bnxt_re_free_msix(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev; int rc; @@ -298,13 +291,9 @@ static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait) en_dev = rdev->en_dev; - if (lock_wait) - rtnl_lock(); rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP); - if (lock_wait) - rtnl_unlock(); return rc; } @@ -320,7 +309,6 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev) num_msix_want = min_t(u32, BNXT_RE_MAX_MSIX, num_online_cpus()); - rtnl_lock(); num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP, rdev->msix_entries, num_msix_want); @@ -335,7 +323,6 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev) } rdev->num_msix = num_msix_got; done: - rtnl_unlock(); return rc; } @@ -358,24 +345,18 @@ static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg, fw_msg->timeout = timeout; } -static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id, - bool lock_wait) +static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_ring_free_input req = {0}; struct hwrm_ring_free_output resp; struct bnxt_fw_msg fw_msg; - bool do_unlock = false; int rc = -EINVAL; if (!en_dev) return rc; memset(&fw_msg, 0, sizeof(fw_msg)); - if (lock_wait) { - rtnl_lock(); - do_unlock = true; - } bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1); req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL; @@ -386,8 +367,6 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id, if (rc) dev_err(rdev_to_dev(rdev), "Failed to free HW ring:%d :%#x", req.ring_id, rc); - if (do_unlock) - rtnl_unlock(); return rc; } @@ -405,7 +384,6 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr, return rc; memset(&fw_msg, 0, sizeof(fw_msg)); - rtnl_lock(); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1); req.enables = 0; req.page_tbl_addr = cpu_to_le64(dma_arr[0]); @@ -426,27 +404,21 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr, if (!rc) *fw_ring_id = le16_to_cpu(resp.ring_id); - rtnl_unlock(); return rc; } static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, - u32 fw_stats_ctx_id, bool lock_wait) + u32 fw_stats_ctx_id) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_stat_ctx_free_input req = {0}; struct bnxt_fw_msg fw_msg; - bool do_unlock = false; int rc = -EINVAL; if (!en_dev) return rc; memset(&fw_msg, 0, sizeof(fw_msg)); - if (lock_wait) { - rtnl_lock(); - do_unlock = true; - } bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1); req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id); @@ -457,8 +429,6 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, dev_err(rdev_to_dev(rdev), "Failed to free HW stats context %#x", rc); - if (do_unlock) - rtnl_unlock(); return rc; } @@ -478,7 +448,6 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, return rc; memset(&fw_msg, 0, sizeof(fw_msg)); - rtnl_lock(); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1); req.update_period_ms = cpu_to_le32(1000); @@ -490,7 +459,6 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, if (!rc) *fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id); - rtnl_unlock(); return rc; } @@ -929,19 +897,19 @@ fail: return rc; } -static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev, bool lock_wait) +static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev) { int i; for (i = 0; i < rdev->num_msix - 1; i++) { - bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, lock_wait); + bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id); bnxt_qplib_free_nq(&rdev->nq[i]); } } -static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait) +static void bnxt_re_free_res(struct bnxt_re_dev *rdev) { - bnxt_re_free_nq_res(rdev, lock_wait); + bnxt_re_free_nq_res(rdev); if (rdev->qplib_res.dpi_tbl.max) { bnxt_qplib_dealloc_dpi(&rdev->qplib_res, @@ -1219,7 +1187,7 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) return 0; } -static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait) +static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) { int i, rc; @@ -1234,28 +1202,27 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait) cancel_delayed_work(&rdev->worker); bnxt_re_cleanup_res(rdev); - bnxt_re_free_res(rdev, lock_wait); + bnxt_re_free_res(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) { rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw); if (rc) dev_warn(rdev_to_dev(rdev), "Failed to deinitialize RCFW: %#x", rc); - bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id, - lock_wait); + bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx); bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); - bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, lock_wait); + bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id); bnxt_qplib_free_rcfw_channel(&rdev->rcfw); } if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) { - rc = bnxt_re_free_msix(rdev, lock_wait); + rc = bnxt_re_free_msix(rdev); if (rc) dev_warn(rdev_to_dev(rdev), "Failed to free MSI-X vectors: %#x", rc); } if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) { - rc = bnxt_re_unregister_netdev(rdev, lock_wait); + rc = bnxt_re_unregister_netdev(rdev); if (rc) dev_warn(rdev_to_dev(rdev), "Failed to unregister with netdev: %#x", rc); @@ -1276,6 +1243,12 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) { int i, j, rc; + bool locked; + + /* Acquire rtnl lock through out this function */ + rtnl_lock(); + locked = true; + /* Registered a new RoCE device instance to netdev */ rc = bnxt_re_register_netdev(rdev); if (rc) { @@ -1374,12 +1347,16 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); } + rtnl_unlock(); + locked = false; + /* Register ib dev */ rc = bnxt_re_register_ib(rdev); if (rc) { pr_err("Failed to register with IB: %#x\n", rc); goto fail; } + set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); dev_info(rdev_to_dev(rdev), "Device registered successfully"); for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) { rc = device_create_file(&rdev->ibdev.dev, @@ -1395,7 +1372,6 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) goto fail; } } - set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed, &rdev->active_width); set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); @@ -1404,17 +1380,21 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) return 0; free_sctx: - bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id, true); + bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); free_ctx: bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx); disable_rcfw: bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); free_ring: - bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, true); + bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id); free_rcfw: bnxt_qplib_free_rcfw_channel(&rdev->rcfw); fail: - bnxt_re_ib_unreg(rdev, true); + if (!locked) + rtnl_lock(); + bnxt_re_ib_unreg(rdev); + rtnl_unlock(); + return rc; } @@ -1567,7 +1547,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, */ if (atomic_read(&rdev->sched_count) > 0) goto exit; - bnxt_re_ib_unreg(rdev, false); + bnxt_re_ib_unreg(rdev); bnxt_re_remove_one(rdev); bnxt_re_dev_unreg(rdev); break; @@ -1646,7 +1626,10 @@ static void __exit bnxt_re_mod_exit(void) */ flush_workqueue(bnxt_re_wq); bnxt_re_dev_stop(rdev); - bnxt_re_ib_unreg(rdev, true); + /* Acquire the rtnl_lock as the L2 resources are freed here */ + rtnl_lock(); + bnxt_re_ib_unreg(rdev); + rtnl_unlock(); bnxt_re_remove_one(rdev); bnxt_re_dev_unreg(rdev); } diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c @@ -6733,6 +6733,7 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags) struct hfi1_devdata *dd = ppd->dd; struct send_context *sc; int i; + int sc_flags; if (flags & FREEZE_SELF) write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK); @@ -6743,11 +6744,13 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags) /* notify all SDMA engines that they are going into a freeze */ sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN)); + sc_flags = SCF_FROZEN | SCF_HALTED | (flags & FREEZE_LINK_DOWN ? + SCF_LINK_DOWN : 0); /* do halt pre-handling on all enabled send contexts */ for (i = 0; i < dd->num_send_contexts; i++) { sc = dd->send_contexts[i].sc; if (sc && (sc->flags & SCF_ENABLED)) - sc_stop(sc, SCF_FROZEN | SCF_HALTED); + sc_stop(sc, sc_flags); } /* Send context are frozen. Notify user space */ @@ -10674,6 +10677,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); handle_linkup_change(dd, 1); + pio_kernel_linkup(dd); /* * After link up, a new link width will have been set. diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c @@ -86,6 +86,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op) unsigned long flags; int write = 1; /* write sendctrl back */ int flush = 0; /* re-read sendctrl to make sure it is flushed */ + int i; spin_lock_irqsave(&dd->sendctrl_lock, flags); @@ -95,9 +96,13 @@ void pio_send_control(struct hfi1_devdata *dd, int op) reg |= SEND_CTRL_SEND_ENABLE_SMASK; /* Fall through */ case PSC_DATA_VL_ENABLE: + mask = 0; + for (i = 0; i < ARRAY_SIZE(dd->vld); i++) + if (!dd->vld[i].mtu) + mask |= BIT_ULL(i); /* Disallow sending on VLs not enabled */ - mask = (((~0ull) << num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK) << - SEND_CTRL_UNSUPPORTED_VL_SHIFT; + mask = (mask & SEND_CTRL_UNSUPPORTED_VL_MASK) << + SEND_CTRL_UNSUPPORTED_VL_SHIFT; reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask; break; case PSC_GLOBAL_DISABLE: @@ -921,20 +926,18 @@ void sc_free(struct send_context *sc) void sc_disable(struct send_context *sc) { u64 reg; - unsigned long flags; struct pio_buf *pbuf; if (!sc) return; /* do all steps, even if already disabled */ - spin_lock_irqsave(&sc->alloc_lock, flags); + spin_lock_irq(&sc->alloc_lock); reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL)); reg &= ~SC(CTRL_CTXT_ENABLE_SMASK); sc->flags &= ~SCF_ENABLED; sc_wait_for_packet_egress(sc, 1); write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg); - spin_unlock_irqrestore(&sc->alloc_lock, flags); /* * Flush any waiters. Once the context is disabled, @@ -944,7 +947,7 @@ void sc_disable(struct send_context *sc) * proceed with the flush. */ udelay(1); - spin_lock_irqsave(&sc->release_lock, flags); + spin_lock(&sc->release_lock); if (sc->sr) { /* this context has a shadow ring */ while (sc->sr_tail != sc->sr_head) { pbuf = &sc->sr[sc->sr_tail].pbuf; @@ -955,7 +958,8 @@ void sc_disable(struct send_context *sc) sc->sr_tail = 0; } } - spin_unlock_irqrestore(&sc->release_lock, flags); + spin_unlock(&sc->release_lock); + spin_unlock_irq(&sc->alloc_lock); } /* return SendEgressCtxtStatus.PacketOccupancy */ @@ -1178,11 +1182,39 @@ void pio_kernel_unfreeze(struct hfi1_devdata *dd) sc = dd->send_contexts[i].sc; if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER) continue; + if (sc->flags & SCF_LINK_DOWN) + continue; sc_enable(sc); /* will clear the sc frozen flag */ } } +/** + * pio_kernel_linkup() - Re-enable send contexts after linkup event + * @dd: valid devive data + * + * When the link goes down, the freeze path is taken. However, a link down + * event is different from a freeze because if the send context is re-enabled + * whowever is sending data will start sending data again, which will hang + * any QP that is sending data. + * + * The freeze path now looks at the type of event that occurs and takes this + * path for link down event. + */ +void pio_kernel_linkup(struct hfi1_devdata *dd) +{ + struct send_context *sc; + int i; + + for (i = 0; i < dd->num_send_contexts; i++) { + sc = dd->send_contexts[i].sc; + if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER) + continue; + + sc_enable(sc); /* will clear the sc link down flag */ + } +} + /* * Wait for the SendPioInitCtxt.PioInitInProgress bit to clear. * Returns: @@ -1382,11 +1414,10 @@ void sc_stop(struct send_context *sc, int flag) { unsigned long flags; - /* mark the context */ - sc->flags |= flag; - /* stop buffer allocations */ spin_lock_irqsave(&sc->alloc_lock, flags); + /* mark the context */ + sc->flags |= flag; sc->flags &= ~SCF_ENABLED; spin_unlock_irqrestore(&sc->alloc_lock, flags); wake_up(&sc->halt_wait); diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h @@ -139,6 +139,7 @@ struct send_context { #define SCF_IN_FREE 0x02 #define SCF_HALTED 0x04 #define SCF_FROZEN 0x08 +#define SCF_LINK_DOWN 0x10 struct send_context_info { struct send_context *sc; /* allocated working context */ @@ -306,6 +307,7 @@ void set_pio_integrity(struct send_context *sc); void pio_reset_all(struct hfi1_devdata *dd); void pio_freeze(struct hfi1_devdata *dd); void pio_kernel_unfreeze(struct hfi1_devdata *dd); +void pio_kernel_linkup(struct hfi1_devdata *dd); /* global PIO send control operations */ #define PSC_GLOBAL_ENABLE 0 diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -828,7 +828,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) { if (++req->iov_idx == req->data_iovs) { ret = -EFAULT; - goto free_txreq; + goto free_tx; } iovec = &req->iovs[req->iov_idx]; WARN_ON(iovec->offset); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c @@ -1582,6 +1582,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) struct hfi1_pportdata *ppd; struct hfi1_devdata *dd; u8 sc5; + u8 sl; if (hfi1_check_mcast(rdma_ah_get_dlid(ah_attr)) && !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) @@ -1590,8 +1591,13 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) /* test the mapping for validity */ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); ppd = ppd_from_ibp(ibp); - sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; dd = dd_from_ppd(ppd); + + sl = rdma_ah_get_sl(ah_attr); + if (sl >= ARRAY_SIZE(ibp->sl_to_sc)) + return -EINVAL; + + sc5 = ibp->sl_to_sc[sl]; if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf) return -EINVAL; return 0; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c @@ -723,6 +723,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; struct devx_obj *obj; int err; @@ -754,10 +755,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) - goto obj_free; + goto obj_destroy; return 0; +obj_destroy: + mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); obj_free: kfree(obj); return err; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2951,7 +2951,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_rdma_ch *ch; - int i; + int i, j; u8 status; shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); @@ -2965,8 +2965,8 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - for (i = 0; i < target->req_ring_size; ++i) { - struct srp_request *req = &ch->req_ring[i]; + for (j = 0; j < target->req_ring_size; ++j) { + struct srp_request *req = &ch->req_ring[j]; srp_finish_req(ch, req, scmnd->device, DID_RESET << 16); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -753,7 +753,6 @@ struct cpl_abort_req_rss { }; struct cpl_abort_req_rss6 { - WR_HDR; union opcode_tid ot; __be32 srqidx_status; };