whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit b8c3b8992ff2827482ac1543f84f5024a72c4f2e
parent 66c56cfa64d9dbb9efa8a06c1aece77e8d57ea19
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Sat, 12 Jan 2019 13:40:51 -0800

Merge tag 'for-linus-20190112' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - NVMe pull request from Christoph, with little fixes all over the map

 - Loop caching fix for offset/bs change (Jaegeuk Kim)

 - Block documentation tweaks (Jeff, Jon, Weiping, John)

 - null_blk zoned tweak (John)

 - ahch mvebu suspend/resume support. Should have gone into the merge
   window, but there was some confusion on which tree had it. (Miquel)

* tag 'for-linus-20190112' of git://git.kernel.dk/linux-block: (22 commits)
  ata: ahci: mvebu: request PHY suspend/resume for Armada 3700
  ata: ahci: mvebu: add Armada 3700 initialization needed for S2RAM
  ata: ahci: mvebu: do Armada 38x configuration only on relevant SoCs
  ata: ahci: mvebu: remove stale comment
  ata: libahci_platform: comply to PHY framework
  loop: drop caches if offset or block_size are changed
  block: fix kerneldoc comment for blk_attempt_plug_merge()
  nvme: don't initlialize ctrl->cntlid twice
  nvme: introduce NVME_QUIRK_IGNORE_DEV_SUBNQN
  nvme: pad fake subsys NQN vid and ssvid with zeros
  nvme-multipath: zero out ANA log buffer
  nvme-fabrics: unset write/poll queues for discovery controllers
  nvme-tcp: don't ask if controller is fabrics
  nvme-tcp: remove dead code
  nvme-pci: fix out of bounds access in nvme_cqe_pending
  nvme-pci: rerun irq setup on IO queue init errors
  nvme-pci: use the same attributes when freeing host_mem_desc_bufs.
  nvme-pci: fix the wrong setting of nr_maps
  block: doc: add slice_idle_us to bfq documentation
  block: clarify documentation for blk_{start|finish}_plug
  ...

Diffstat:
MDocumentation/ABI/testing/sysfs-block | 9+++++++++
MDocumentation/block/bfq-iosched.txt | 7+++++++
MDocumentation/block/null_blk.txt | 3++-
MDocumentation/block/queue-sysfs.txt | 7+++++++
Mblock/blk-core.c | 20+++++++++++++++++++-
Mdrivers/ata/ahci.h | 2++
Mdrivers/ata/ahci_mvebu.c | 87++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Mdrivers/ata/libahci_platform.c | 13+++++++++++++
Mdrivers/block/loop.c | 35+++++++++++++++++++++++++++++++++--
Mdrivers/block/null_blk.h | 1+
Mdrivers/nvme/host/core.c | 19++++++++++---------
Mdrivers/nvme/host/fabrics.c | 2++
Mdrivers/nvme/host/multipath.c | 2++
Mdrivers/nvme/host/nvme.h | 5+++++
Mdrivers/nvme/host/pci.c | 67+++++++++++++++++++++++++++++++++++++++++++++++--------------------
Mdrivers/nvme/host/tcp.c | 16++++++----------
16 files changed, 229 insertions(+), 66 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block @@ -279,3 +279,12 @@ Description: size in 512B sectors of the zones of the device, with the eventual exception of the last zone of the device which may be smaller. + +What: /sys/block/<disk>/queue/io_timeout +Date: November 2018 +Contact: Weiping Zhang <zhangweiping@didiglobal.com> +Description: + io_timeout is the request timeout in milliseconds. If a request + does not complete in this time then the block driver timeout + handler is invoked. That timeout handler can decide to retry + the request, to fail it or to start a device recovery strategy. diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt @@ -357,6 +357,13 @@ video playing/streaming, a very low drop rate may be more important than maximum throughput. In these cases, consider setting the strict_guarantees parameter. +slice_idle_us +------------- + +Controls the same tuning parameter as slice_idle, but in microseconds. +Either tunable can be used to set idling behavior. Afterwards, the +other tunable will reflect the newly set value in sysfs. + strict_guarantees ----------------- diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt @@ -88,7 +88,8 @@ shared_tags=[0/1]: Default: 0 zoned=[0/1]: Default: 0 0: Block device is exposed as a random-access block device. - 1: Block device is exposed as a host-managed zoned block device. + 1: Block device is exposed as a host-managed zoned block device. Requires + CONFIG_BLK_DEV_ZONED. zone_size=[MB]: Default: 256 Per zone size when exposed as a zoned block device. Must be a power of two. diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt @@ -67,6 +67,13 @@ If set to a value larger than 0, the kernel will put the process issuing IO to sleep for this amount of microseconds before entering classic polling. +io_timeout (RW) +--------------- +io_timeout is the request timeout in milliseconds. If a request does not +complete in this time then the block driver timeout handler is invoked. +That timeout handler can decide to retry the request, to fail it or to start +a device recovery strategy. + iostats (RW) ------------- This file is used to control (on/off) the iostats accounting of the diff --git a/block/blk-core.c b/block/blk-core.c @@ -661,7 +661,6 @@ no_merge: * blk_attempt_plug_merge - try to merge with %current's plugged list * @q: request_queue new bio is being queued at * @bio: new bio being queued - * @request_count: out parameter for number of traversed plugged requests * @same_queue_rq: pointer to &struct request that gets filled in when * another request associated with @q is found on the plug list * (optional, may be %NULL) @@ -1683,6 +1682,15 @@ EXPORT_SYMBOL(kblockd_mod_delayed_work_on); * @plug: The &struct blk_plug that needs to be initialized * * Description: + * blk_start_plug() indicates to the block layer an intent by the caller + * to submit multiple I/O requests in a batch. The block layer may use + * this hint to defer submitting I/Os from the caller until blk_finish_plug() + * is called. However, the block layer may choose to submit requests + * before a call to blk_finish_plug() if the number of queued I/Os + * exceeds %BLK_MAX_REQUEST_COUNT, or if the size of the I/O is larger than + * %BLK_PLUG_FLUSH_SIZE. The queued I/Os may also be submitted early if + * the task schedules (see below). + * * Tracking blk_plug inside the task_struct will help with auto-flushing the * pending I/O should the task end up blocking between blk_start_plug() and * blk_finish_plug(). This is important from a performance perspective, but @@ -1765,6 +1773,16 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) blk_mq_flush_plug_list(plug, from_schedule); } +/** + * blk_finish_plug - mark the end of a batch of submitted I/O + * @plug: The &struct blk_plug passed to blk_start_plug() + * + * Description: + * Indicate that a batch of I/O submissions is complete. This function + * must be paired with an initial call to blk_start_plug(). The intent + * is to allow the block layer to optimize I/O submission. See the + * documentation for blk_start_plug() for more information. + */ void blk_finish_plug(struct blk_plug *plug) { if (plug != current->plug) diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h @@ -254,6 +254,8 @@ enum { AHCI_HFLAG_IS_MOBILE = (1 << 25), /* mobile chipset, use SATA_MOBILE_LPM_POLICY as default lpm_policy */ + AHCI_HFLAG_SUSPEND_PHYS = (1 << 26), /* handle PHYs during + suspend/resume */ /* ap->flags bits */ diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c @@ -28,6 +28,11 @@ #define AHCI_WINDOW_BASE(win) (0x64 + ((win) << 4)) #define AHCI_WINDOW_SIZE(win) (0x68 + ((win) << 4)) +struct ahci_mvebu_plat_data { + int (*plat_config)(struct ahci_host_priv *hpriv); + unsigned int flags; +}; + static void ahci_mvebu_mbus_config(struct ahci_host_priv *hpriv, const struct mbus_dram_target_info *dram) { @@ -62,6 +67,35 @@ static void ahci_mvebu_regret_option(struct ahci_host_priv *hpriv) writel(0x80, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA); } +static int ahci_mvebu_armada_380_config(struct ahci_host_priv *hpriv) +{ + const struct mbus_dram_target_info *dram; + int rc = 0; + + dram = mv_mbus_dram_info(); + if (dram) + ahci_mvebu_mbus_config(hpriv, dram); + else + rc = -ENODEV; + + ahci_mvebu_regret_option(hpriv); + + return rc; +} + +static int ahci_mvebu_armada_3700_config(struct ahci_host_priv *hpriv) +{ + u32 reg; + + writel(0, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_ADDR); + + reg = readl(hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA); + reg |= BIT(6); + writel(reg, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA); + + return 0; +} + /** * ahci_mvebu_stop_engine * @@ -126,13 +160,9 @@ static int ahci_mvebu_resume(struct platform_device *pdev) { struct ata_host *host = platform_get_drvdata(pdev); struct ahci_host_priv *hpriv = host->private_data; - const struct mbus_dram_target_info *dram; + const struct ahci_mvebu_plat_data *pdata = hpriv->plat_data; - dram = mv_mbus_dram_info(); - if (dram) - ahci_mvebu_mbus_config(hpriv, dram); - - ahci_mvebu_regret_option(hpriv); + pdata->plat_config(hpriv); return ahci_platform_resume_host(&pdev->dev); } @@ -154,29 +184,30 @@ static struct scsi_host_template ahci_platform_sht = { static int ahci_mvebu_probe(struct platform_device *pdev) { + const struct ahci_mvebu_plat_data *pdata; struct ahci_host_priv *hpriv; - const struct mbus_dram_target_info *dram; int rc; + pdata = of_device_get_match_data(&pdev->dev); + if (!pdata) + return -EINVAL; + hpriv = ahci_platform_get_resources(pdev, 0); if (IS_ERR(hpriv)) return PTR_ERR(hpriv); + hpriv->flags |= pdata->flags; + hpriv->plat_data = (void *)pdata; + rc = ahci_platform_enable_resources(hpriv); if (rc) return rc; hpriv->stop_engine = ahci_mvebu_stop_engine; - if (of_device_is_compatible(pdev->dev.of_node, - "marvell,armada-380-ahci")) { - dram = mv_mbus_dram_info(); - if (!dram) - return -ENODEV; - - ahci_mvebu_mbus_config(hpriv, dram); - ahci_mvebu_regret_option(hpriv); - } + rc = pdata->plat_config(hpriv); + if (rc) + goto disable_resources; rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info, &ahci_platform_sht); @@ -190,18 +221,28 @@ disable_resources: return rc; } +static const struct ahci_mvebu_plat_data ahci_mvebu_armada_380_plat_data = { + .plat_config = ahci_mvebu_armada_380_config, +}; + +static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = { + .plat_config = ahci_mvebu_armada_3700_config, + .flags = AHCI_HFLAG_SUSPEND_PHYS, +}; + static const struct of_device_id ahci_mvebu_of_match[] = { - { .compatible = "marvell,armada-380-ahci", }, - { .compatible = "marvell,armada-3700-ahci", }, + { + .compatible = "marvell,armada-380-ahci", + .data = &ahci_mvebu_armada_380_plat_data, + }, + { + .compatible = "marvell,armada-3700-ahci", + .data = &ahci_mvebu_armada_3700_plat_data, + }, { }, }; MODULE_DEVICE_TABLE(of, ahci_mvebu_of_match); -/* - * We currently don't provide power management related operations, - * since there is no suspend/resume support at the platform level for - * Armada 38x for the moment. - */ static struct platform_driver ahci_mvebu_driver = { .probe = ahci_mvebu_probe, .remove = ata_platform_remove_one, diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c @@ -56,6 +56,12 @@ static int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) if (rc) goto disable_phys; + rc = phy_set_mode(hpriv->phys[i], PHY_MODE_SATA); + if (rc) { + phy_exit(hpriv->phys[i]); + goto disable_phys; + } + rc = phy_power_on(hpriv->phys[i]); if (rc) { phy_exit(hpriv->phys[i]); @@ -738,6 +744,9 @@ int ahci_platform_suspend_host(struct device *dev) writel(ctl, mmio + HOST_CTL); readl(mmio + HOST_CTL); /* flush */ + if (hpriv->flags & AHCI_HFLAG_SUSPEND_PHYS) + ahci_platform_disable_phys(hpriv); + return ata_host_suspend(host, PMSG_SUSPEND); } EXPORT_SYMBOL_GPL(ahci_platform_suspend_host); @@ -756,6 +765,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_suspend_host); int ahci_platform_resume_host(struct device *dev) { struct ata_host *host = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = host->private_data; int rc; if (dev->power.power_state.event == PM_EVENT_SUSPEND) { @@ -766,6 +776,9 @@ int ahci_platform_resume_host(struct device *dev) ahci_init_controller(host); } + if (hpriv->flags & AHCI_HFLAG_SUSPEND_PHYS) + ahci_platform_enable_phys(hpriv); + ata_host_resume(host); return 0; diff --git a/drivers/block/loop.c b/drivers/block/loop.c @@ -1190,6 +1190,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) goto out_unlock; } + if (lo->lo_offset != info->lo_offset || + lo->lo_sizelimit != info->lo_sizelimit) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } + /* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); @@ -1218,6 +1224,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { + /* kill_bdev should have truncated all the pages */ + if (lo->lo_device->bd_inode->i_mapping->nrpages) { + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto out_unfreeze; + } if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto out_unfreeze; @@ -1443,22 +1457,39 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { + int err = 0; + if (lo->lo_state != Lo_bound) return -ENXIO; if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL; + if (lo->lo_queue->limits.logical_block_size != arg) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } + blk_mq_freeze_queue(lo->lo_queue); + /* kill_bdev should have truncated all the pages */ + if (lo->lo_queue->limits.logical_block_size != arg && + lo->lo_device->bd_inode->i_mapping->nrpages) { + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto out_unfreeze; + } + blk_queue_logical_block_size(lo->lo_queue, arg); blk_queue_physical_block_size(lo->lo_queue, arg); blk_queue_io_min(lo->lo_queue, arg); loop_update_dio(lo); - +out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue); - return 0; + return err; } static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h @@ -97,6 +97,7 @@ void null_zone_reset(struct nullb_cmd *cmd, sector_t sector); #else static inline int null_zone_init(struct nullb_device *dev) { + pr_err("null_blk: CONFIG_BLK_DEV_ZONED not enabled\n"); return -EINVAL; } static inline void null_zone_exit(struct nullb_device *dev) {} diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c @@ -2173,18 +2173,20 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct size_t nqnlen; int off; - nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); - if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { - strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); - return; - } + if(!(ctrl->quirks & NVME_QUIRK_IGNORE_DEV_SUBNQN)) { + nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); + if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { + strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); + return; + } - if (ctrl->vs >= NVME_VS(1, 2, 1)) - dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n"); + if (ctrl->vs >= NVME_VS(1, 2, 1)) + dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n"); + } /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */ off = snprintf(subsys->subnqn, NVMF_NQN_SIZE, - "nqn.2014.08.org.nvmexpress:%4x%4x", + "nqn.2014.08.org.nvmexpress:%04x%04x", le16_to_cpu(id->vid), le16_to_cpu(id->ssvid)); memcpy(subsys->subnqn + off, id->sn, sizeof(id->sn)); off += sizeof(id->sn); @@ -2500,7 +2502,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->oaes = le32_to_cpu(id->oaes); atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; - ctrl->cntlid = le16_to_cpup(&id->cntlid); if (id->mdts) max_hw_sectors = 1 << (id->mdts + page_shift - 9); else diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c @@ -874,6 +874,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, if (opts->discovery_nqn) { opts->kato = 0; opts->nr_io_queues = 0; + opts->nr_write_queues = 0; + opts->nr_poll_queues = 0; opts->duplicate_connect = true; } if (ctrl_loss_tmo < 0) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c @@ -570,6 +570,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) return 0; out_free_ana_log_buf: kfree(ctrl->ana_log_buf); + ctrl->ana_log_buf = NULL; out: return error; } @@ -577,5 +578,6 @@ out: void nvme_mpath_uninit(struct nvme_ctrl *ctrl) { kfree(ctrl->ana_log_buf); + ctrl->ana_log_buf = NULL; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h @@ -90,6 +90,11 @@ enum nvme_quirks { * Set MEDIUM priority on SQ creation */ NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7), + + /* + * Ignore device provided subnqn. + */ + NVME_QUIRK_IGNORE_DEV_SUBNQN = (1 << 8), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c @@ -95,6 +95,7 @@ struct nvme_dev; struct nvme_queue; static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); +static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode); /* * Represents an NVM Express device. Each nvme_dev is a PCI function. @@ -1019,9 +1020,11 @@ static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end) static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) { - if (++nvmeq->cq_head == nvmeq->q_depth) { + if (nvmeq->cq_head == nvmeq->q_depth - 1) { nvmeq->cq_head = 0; nvmeq->cq_phase = !nvmeq->cq_phase; + } else { + nvmeq->cq_head++; } } @@ -1420,6 +1423,14 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) return 0; } +static void nvme_suspend_io_queues(struct nvme_dev *dev) +{ + int i; + + for (i = dev->ctrl.queue_count - 1; i > 0; i--) + nvme_suspend_queue(&dev->queues[i]); +} + static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { struct nvme_queue *nvmeq = &dev->queues[0]; @@ -1885,8 +1896,9 @@ static void nvme_free_host_mem(struct nvme_dev *dev) struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i]; size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size; - dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i], - le64_to_cpu(desc->addr)); + dma_free_attrs(dev->dev, size, dev->host_mem_desc_bufs[i], + le64_to_cpu(desc->addr), + DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); } kfree(dev->host_mem_desc_bufs); @@ -1952,8 +1964,9 @@ out_free_bufs: while (--i >= 0) { size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size; - dma_free_coherent(dev->dev, size, bufs[i], - le64_to_cpu(descs[i].addr)); + dma_free_attrs(dev->dev, size, bufs[i], + le64_to_cpu(descs[i].addr), + DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); } kfree(bufs); @@ -2132,6 +2145,12 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) return result; } +static void nvme_disable_io_queues(struct nvme_dev *dev) +{ + if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq)) + __nvme_disable_io_queues(dev, nvme_admin_delete_cq); +} + static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = &dev->queues[0]; @@ -2168,6 +2187,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) } while (1); adminq->q_db = dev->dbs; + retry: /* Deregister the admin queue's interrupt */ pci_free_irq(pdev, 0, adminq); @@ -2185,25 +2205,34 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) result = max(result - 1, 1); dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL]; - dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n", - dev->io_queues[HCTX_TYPE_DEFAULT], - dev->io_queues[HCTX_TYPE_READ], - dev->io_queues[HCTX_TYPE_POLL]); - /* * Should investigate if there's a performance win from allocating * more queues than interrupt vectors; it might allow the submission * path to scale better, even if the receive path is limited by the * number of interrupts. */ - result = queue_request_irq(adminq); if (result) { adminq->cq_vector = -1; return result; } set_bit(NVMEQ_ENABLED, &adminq->flags); - return nvme_create_io_queues(dev); + + result = nvme_create_io_queues(dev); + if (result || dev->online_queues < 2) + return result; + + if (dev->online_queues - 1 < dev->max_qid) { + nr_io_queues = dev->online_queues - 1; + nvme_disable_io_queues(dev); + nvme_suspend_io_queues(dev); + goto retry; + } + dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n", + dev->io_queues[HCTX_TYPE_DEFAULT], + dev->io_queues[HCTX_TYPE_READ], + dev->io_queues[HCTX_TYPE_POLL]); + return 0; } static void nvme_del_queue_end(struct request *req, blk_status_t error) @@ -2248,7 +2277,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) return 0; } -static bool nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) +static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) { int nr_queues = dev->online_queues - 1, sent = 0; unsigned long timeout; @@ -2294,7 +2323,6 @@ static int nvme_dev_add(struct nvme_dev *dev) dev->tagset.nr_maps = 2; /* default + read */ if (dev->io_queues[HCTX_TYPE_POLL]) dev->tagset.nr_maps++; - dev->tagset.nr_maps = HCTX_MAX_TYPES; dev->tagset.timeout = NVME_IO_TIMEOUT; dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.queue_depth = @@ -2410,7 +2438,6 @@ static void nvme_pci_disable(struct nvme_dev *dev) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { - int i; bool dead = true; struct pci_dev *pdev = to_pci_dev(dev->dev); @@ -2437,13 +2464,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_stop_queues(&dev->ctrl); if (!dead && dev->ctrl.queue_count > 0) { - if (nvme_disable_io_queues(dev, nvme_admin_delete_sq)) - nvme_disable_io_queues(dev, nvme_admin_delete_cq); + nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); } - for (i = dev->ctrl.queue_count - 1; i >= 0; i--) - nvme_suspend_queue(&dev->queues[i]); - + nvme_suspend_io_queues(dev); + nvme_suspend_queue(&dev->queues[0]); nvme_pci_disable(dev); blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); @@ -2946,6 +2971,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_MEDIUM_PRIO_SQ }, + { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */ + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c @@ -1565,8 +1565,7 @@ static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_io_queues(ctrl); if (remove) { - if (ctrl->ops->flags & NVME_F_FABRICS) - blk_cleanup_queue(ctrl->connect_q); + blk_cleanup_queue(ctrl->connect_q); blk_mq_free_tag_set(ctrl->tagset); } nvme_tcp_free_io_queues(ctrl); @@ -1587,12 +1586,10 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) goto out_free_io_queues; } - if (ctrl->ops->flags & NVME_F_FABRICS) { - ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); - if (IS_ERR(ctrl->connect_q)) { - ret = PTR_ERR(ctrl->connect_q); - goto out_free_tag_set; - } + ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); + if (IS_ERR(ctrl->connect_q)) { + ret = PTR_ERR(ctrl->connect_q); + goto out_free_tag_set; } } else { blk_mq_update_nr_hw_queues(ctrl->tagset, @@ -1606,7 +1603,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) return 0; out_cleanup_connect_q: - if (new && (ctrl->ops->flags & NVME_F_FABRICS)) + if (new) blk_cleanup_queue(ctrl->connect_q); out_free_tag_set: if (new) @@ -1620,7 +1617,6 @@ static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_queue(ctrl, 0); if (remove) { - free_opal_dev(ctrl->opal_dev); blk_cleanup_queue(ctrl->admin_q); blk_mq_free_tag_set(ctrl->admin_tagset); }