whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 5eeb63359b1ec4914d9898e24aa357ff930e6ee1
parent 9ace868a17d875ee18f9127d0f456be6dd88fa47
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Fri,  1 Feb 2019 10:39:24 -0800

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "Still not much going on, the usual set of oops and driver fixes this
  time:

   - Fix two uapi breakage regressions in mlx5 drivers

   - Various oops fixes in hfi1, mlx4, umem, uverbs, and ipoib

   - A protocol bug fix for hfi1 preventing it from implementing the
     verbs API properly, and a compatability fix for EXEC STACK user
     programs

   - Fix missed refcounting in the 'advise_mr' patches merged this
     cycle.

   - Fix wrong use of the uABI in the hns SRQ patches merged this cycle"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  IB/uverbs: Fix OOPs in uverbs_user_mmap_disassociate
  IB/ipoib: Fix for use-after-free in ipoib_cm_tx_start
  IB/uverbs: Fix ioctl query port to consider device disassociation
  RDMA/mlx5: Fix flow creation on representors
  IB/uverbs: Fix OOPs upon device disassociation
  RDMA/umem: Add missing initialization of owning_mm
  RDMA/hns: Update the kernel header file of hns
  IB/mlx5: Fix how advise_mr() launches async work
  RDMA/device: Expose ib_device_try_get(()
  IB/hfi1: Add limit test for RC/UC send via loopback
  IB/hfi1: Remove overly conservative VM_EXEC flag check
  IB/{hfi1, qib}: Fix WC.byte_len calculation for UD_SEND_WITH_IMM
  IB/mlx4: Fix using wrong function to destroy sqp AHs under SRIOV
  RDMA/mlx5: Fix check for supported user flags when creating a QP

Diffstat:
Mdrivers/infiniband/core/core_priv.h | 1-
Mdrivers/infiniband/core/device.c | 13++++++++++---
Mdrivers/infiniband/core/umem_odp.c | 3+++
Mdrivers/infiniband/core/uverbs_main.c | 25++++++++++++++++---------
Mdrivers/infiniband/core/uverbs_std_types_device.c | 8+++++++-
Mdrivers/infiniband/hw/hfi1/file_ops.c | 2+-
Mdrivers/infiniband/hw/hfi1/ud.c | 1-
Mdrivers/infiniband/hw/hns/hns_roce_srq.c | 10++++++++--
Mdrivers/infiniband/hw/mlx4/mad.c | 6+++---
Mdrivers/infiniband/hw/mlx5/flow.c | 3+--
Mdrivers/infiniband/hw/mlx5/odp.c | 10+++++-----
Mdrivers/infiniband/hw/mlx5/qp.c | 16+++++++++-------
Mdrivers/infiniband/hw/qib/qib_ud.c | 1-
Mdrivers/infiniband/sw/rdmavt/qp.c | 7++++++-
Mdrivers/infiniband/ulp/ipoib/ipoib.h | 1-
Mdrivers/infiniband/ulp/ipoib/ipoib_cm.c | 3+--
Minclude/rdma/ib_verbs.h | 24++++++++++++++++++++++--
Minclude/uapi/rdma/hns-abi.h | 5+++++
18 files changed, 97 insertions(+), 42 deletions(-)

diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h @@ -267,7 +267,6 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, #endif struct ib_device *ib_device_get_by_index(u32 ifindex); -void ib_device_put(struct ib_device *device); /* RDMA device netlink */ void nldev_init(void); void nldev_exit(void); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c @@ -156,19 +156,26 @@ struct ib_device *ib_device_get_by_index(u32 index) down_read(&lists_rwsem); device = __ib_device_get_by_index(index); if (device) { - /* Do not return a device if unregistration has started. */ - if (!refcount_inc_not_zero(&device->refcount)) + if (!ib_device_try_get(device)) device = NULL; } up_read(&lists_rwsem); return device; } +/** + * ib_device_put - Release IB device reference + * @device: device whose reference to be released + * + * ib_device_put() releases reference to the IB device to allow it to be + * unregistered and eventually free. + */ void ib_device_put(struct ib_device *device) { if (refcount_dec_and_test(&device->refcount)) complete(&device->unreg_completion); } +EXPORT_SYMBOL(ib_device_put); static struct ib_device *__ib_device_get_by_name(const char *name) { @@ -303,7 +310,6 @@ struct ib_device *ib_alloc_device(size_t size) rwlock_init(&device->client_data_lock); INIT_LIST_HEAD(&device->client_data_list); INIT_LIST_HEAD(&device->port_list); - refcount_set(&device->refcount, 1); init_completion(&device->unreg_completion); return device; @@ -620,6 +626,7 @@ int ib_register_device(struct ib_device *device, const char *name, goto cg_cleanup; } + refcount_set(&device->refcount, 1); device->reg_state = IB_DEV_REGISTERED; list_for_each_entry(client, &client_list, list) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c @@ -352,6 +352,8 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, umem->writable = 1; umem->is_odp = 1; odp_data->per_mm = per_mm; + umem->owning_mm = per_mm->mm; + mmgrab(umem->owning_mm); mutex_init(&odp_data->umem_mutex); init_completion(&odp_data->notifier_completion); @@ -384,6 +386,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, out_page_list: vfree(odp_data->page_list); out_odp_data: + mmdrop(umem->owning_mm); kfree(odp_data); return ERR_PTR(ret); } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c @@ -204,6 +204,9 @@ void ib_uverbs_release_file(struct kref *ref) if (atomic_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); + if (file->async_file) + kref_put(&file->async_file->ref, + ib_uverbs_release_async_event_file); put_device(&file->device->dev); kfree(file); } @@ -964,11 +967,19 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) /* Get an arbitrary mm pointer that hasn't been cleaned yet */ mutex_lock(&ufile->umap_lock); - if (!list_empty(&ufile->umaps)) { - mm = list_first_entry(&ufile->umaps, - struct rdma_umap_priv, list) - ->vma->vm_mm; - mmget(mm); + while (!list_empty(&ufile->umaps)) { + int ret; + + priv = list_first_entry(&ufile->umaps, + struct rdma_umap_priv, list); + mm = priv->vma->vm_mm; + ret = mmget_not_zero(mm); + if (!ret) { + list_del_init(&priv->list); + mm = NULL; + continue; + } + break; } mutex_unlock(&ufile->umap_lock); if (!mm) @@ -1096,10 +1107,6 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) list_del_init(&file->list); mutex_unlock(&file->device->lists_mutex); - if (file->async_file) - kref_put(&file->async_file->ref, - ib_uverbs_release_async_event_file); - kref_put(&file->ref, ib_uverbs_release_file); return 0; diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c @@ -168,12 +168,18 @@ void copy_port_attr_to_resp(struct ib_port_attr *attr, static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)( struct uverbs_attr_bundle *attrs) { - struct ib_device *ib_dev = attrs->ufile->device->ib_dev; + struct ib_device *ib_dev; struct ib_port_attr attr = {}; struct ib_uverbs_query_port_resp_ex resp = {}; + struct ib_ucontext *ucontext; int ret; u8 port_num; + ucontext = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; + /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */ if (!ib_dev->ops.query_port) return -EOPNOTSUPP; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c @@ -488,7 +488,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) vmf = 1; break; case STATUS: - if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) { + if (flags & VM_WRITE) { ret = -EPERM; goto done; } diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c @@ -987,7 +987,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = packet->ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -210,6 +210,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_ib_create_srq_resp resp = {}; struct hns_roce_srq *srq; int srq_desc_size; int srq_buf_size; @@ -378,16 +379,21 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, srq->event = hns_roce_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->srqn; + resp.srqn = srq->srqn; if (udata) { - if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { + if (ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp)))) { ret = -EFAULT; - goto err_wrid; + goto err_srqc_alloc; } } return &srq->ibsrq; +err_srqc_alloc: + hns_roce_srq_free(hr_dev, srq); + err_wrid: kvfree(srq->wrid); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c @@ -1411,7 +1411,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); if (sqp->tx_ring[wire_tx_ix].ah) - rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); + mlx4_ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); sqp->tx_ring[wire_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, @@ -1902,7 +1902,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) if (wc.status == IB_WC_SUCCESS) { switch (wc.opcode) { case IB_WC_SEND: - rdma_destroy_ah(sqp->tx_ring[wc.wr_id & + mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; @@ -1931,7 +1931,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) " status = %d, wrid = 0x%llx\n", ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { - rdma_destroy_ah(sqp->tx_ring[wc.wr_id & + mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c @@ -630,8 +630,7 @@ const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), UAPI_DEF_CHAIN_OBJ_TREE( UVERBS_OBJECT_FLOW, - &mlx5_ib_fs, - UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + &mlx5_ib_fs), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_actions), {}, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c @@ -1595,10 +1595,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *work) struct prefetch_mr_work *w = container_of(work, struct prefetch_mr_work, work); - if (w->dev->ib_dev.reg_state == IB_DEV_REGISTERED) + if (ib_device_try_get(&w->dev->ib_dev)) { mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list, w->num_sge); - + ib_device_put(&w->dev->ib_dev); + } + put_device(&w->dev->ib_dev.dev); kfree(w); } @@ -1617,15 +1619,13 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list, num_sge); - if (dev->ib_dev.reg_state != IB_DEV_REGISTERED) - return -ENODEV; - work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL); if (!work) return -ENOMEM; memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); + get_device(&dev->ib_dev.dev); work->dev = dev; work->pf_flags = pf_flags; work->num_sge = num_sge; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c @@ -1912,14 +1912,16 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, } if (!check_flags_mask(ucmd.flags, + MLX5_QP_FLAG_ALLOW_SCATTER_CQE | + MLX5_QP_FLAG_BFREG_INDEX | + MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE | + MLX5_QP_FLAG_SCATTER_CQE | MLX5_QP_FLAG_SIGNATURE | - MLX5_QP_FLAG_SCATTER_CQE | - MLX5_QP_FLAG_TUNNEL_OFFLOADS | - MLX5_QP_FLAG_BFREG_INDEX | - MLX5_QP_FLAG_TYPE_DCT | - MLX5_QP_FLAG_TYPE_DCI | - MLX5_QP_FLAG_ALLOW_SCATTER_CQE | - MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE)) + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC | + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | + MLX5_QP_FLAG_TUNNEL_OFFLOADS | + MLX5_QP_FLAG_TYPE_DCI | + MLX5_QP_FLAG_TYPE_DCT)) return -EINVAL; err = get_qp_user_index(to_mucontext(pd->uobject->context), diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c @@ -512,7 +512,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c @@ -2910,6 +2910,8 @@ send: goto op_err; if (!ret) goto rnr_nak; + if (wqe->length > qp->r_len) + goto inv_err; break; case IB_WR_RDMA_WRITE_WITH_IMM: @@ -3078,7 +3080,10 @@ op_err: goto err; inv_err: - send_status = IB_WC_REM_INV_REQ_ERR; + send_status = + sqp->ibqp.qp_type == IB_QPT_RC ? + IB_WC_REM_INV_REQ_ERR : + IB_WC_SUCCESS; wc.status = IB_WC_LOC_QP_OP_ERR; goto err; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -248,7 +248,6 @@ struct ipoib_cm_tx { struct list_head list; struct net_device *dev; struct ipoib_neigh *neigh; - struct ipoib_path *path; struct ipoib_tx_buf *tx_ring; unsigned int tx_head; unsigned int tx_tail; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1312,7 +1312,6 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path neigh->cm = tx; tx->neigh = neigh; - tx->path = path; tx->dev = dev; list_add(&tx->list, &priv->cm.start_list); set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); @@ -1371,7 +1370,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) neigh->daddr + QPN_AND_OPTIONS_OFFSET); goto free_neigh; } - memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec)); + memcpy(&pathrec, &path->pathrec, sizeof(pathrec)); spin_unlock_irqrestore(&priv->lock, flags); netif_tx_unlock_bh(dev); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h @@ -2579,9 +2579,10 @@ struct ib_device { const struct uapi_definition *driver_def; enum rdma_driver_id driver_id; + /* - * Provides synchronization between device unregistration and netlink - * commands on a device. To be used only by core. + * Positive refcount indicates that the device is currently + * registered and cannot be unregistered. */ refcount_t refcount; struct completion unreg_completion; @@ -3926,6 +3927,25 @@ static inline bool ib_access_writable(int access_flags) int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); +/** + * ib_device_try_get: Hold a registration lock + * device: The device to lock + * + * A device under an active registration lock cannot become unregistered. It + * is only possible to obtain a registration lock on a device that is fully + * registered, otherwise this function returns false. + * + * The registration lock is only necessary for actions which require the + * device to still be registered. Uses that only require the device pointer to + * be valid should use get_device(&ibdev->dev) to hold the memory. + * + */ +static inline bool ib_device_try_get(struct ib_device *dev) +{ + return refcount_inc_not_zero(&dev->refcount); +} + +void ib_device_put(struct ib_device *device); struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr); diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h @@ -52,6 +52,11 @@ struct hns_roce_ib_create_srq { __aligned_u64 que_addr; }; +struct hns_roce_ib_create_srq_resp { + __u32 srqn; + __u32 reserved; +}; + struct hns_roce_ib_create_qp { __aligned_u64 buf_addr; __aligned_u64 db_addr;