whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit e5a8a1163211e2ab6f71c3545b26d55dc99ff1d3
parent 5610789ad08dacfd96fd684b53d7acd9e628ca20
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Sat,  9 Feb 2019 10:26:09 -0800

Merge tag 'for-linus-20190209' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - NVMe pull request from Christoph, fixing namespace locking when
   dealing with the effects log, and a rapid add/remove issue (Keith)

 - blktrace tweak, ensuring requests with -1 sectors are shown (Jan)

 - link power management quirk for a Smasung SSD (Hans)

 - m68k nfblock dynamic major number fix (Chengguang)

 - series fixing blk-iolatency inflight counter issue (Liu)

 - ensure that we clear ->private when setting up the aio kiocb (Mike)

 - __find_get_block_slow() rate limit print (Tetsuo)

* tag 'for-linus-20190209' of git://git.kernel.dk/linux-block:
  blk-mq: remove duplicated definition of blk_mq_freeze_queue
  Blk-iolatency: warn on negative inflight IO counter
  blk-iolatency: fix IO hang due to negative inflight counter
  blktrace: Show requests without sector
  fs: ratelimit __find_get_block_slow() failure message.
  m68k: set proper major_num when specifying module param major_num
  libata: Add NOLPM quirk for SAMSUNG MZ7TE512HMHP-000L1 SSD
  nvme-pci: fix rapid add remove sequence
  nvme: lock NS list changes while handling command effects
  aio: initialize kiocb private in case any filesystems expect it.

Diffstat:
March/m68k/emu/nfblock.c | 10+++++++---
Mblock/blk-iolatency.c | 56++++++++++++++++++++++++++++++++++++++++++++++++--------
Mblock/blk-mq.h | 1-
Mdrivers/ata/libata-core.c | 1+
Mdrivers/nvme/host/core.c | 8+++++++-
Mdrivers/nvme/host/nvme.h | 1+
Mdrivers/nvme/host/pci.c | 22++++++++++++----------
Mfs/aio.c | 1+
Mfs/buffer.c | 19++++++++++---------
Minclude/linux/blktrace_api.h | 8+++++++-
10 files changed, 94 insertions(+), 33 deletions(-)

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c @@ -155,18 +155,22 @@ out: static int __init nfhd_init(void) { u32 blocks, bsize; + int ret; int i; nfhd_id = nf_get_id("XHDI"); if (!nfhd_id) return -ENODEV; - major_num = register_blkdev(major_num, "nfhd"); - if (major_num <= 0) { + ret = register_blkdev(major_num, "nfhd"); + if (ret < 0) { pr_warn("nfhd: unable to get major number\n"); - return major_num; + return ret; } + if (!major_num) + major_num = ret; + for (i = NFHD_DEV_OFFSET; i < 24; i++) { if (nfhd_get_capacity(i, 0, &blocks, &bsize)) continue; diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c @@ -72,6 +72,7 @@ #include <linux/sched/loadavg.h> #include <linux/sched/signal.h> #include <trace/events/block.h> +#include <linux/blk-mq.h> #include "blk-rq-qos.h" #include "blk-stat.h" @@ -591,6 +592,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) u64 now = ktime_to_ns(ktime_get()); bool issue_as_root = bio_issue_as_root_blkg(bio); bool enabled = false; + int inflight = 0; blkg = bio->bi_blkg; if (!blkg || !bio_flagged(bio, BIO_TRACKED)) @@ -601,6 +603,9 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) return; enabled = blk_iolatency_enabled(iolat->blkiolat); + if (!enabled) + return; + while (blkg && blkg->parent) { iolat = blkg_to_lat(blkg); if (!iolat) { @@ -609,8 +614,9 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) } rqw = &iolat->rq_wait; - atomic_dec(&rqw->inflight); - if (!enabled || iolat->min_lat_nsec == 0) + inflight = atomic_dec_return(&rqw->inflight); + WARN_ON_ONCE(inflight < 0); + if (iolat->min_lat_nsec == 0) goto next; iolatency_record_time(iolat, &bio->bi_issue, now, issue_as_root); @@ -754,10 +760,13 @@ int blk_iolatency_init(struct request_queue *q) return 0; } -static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) +/* + * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise + * return 0. + */ +static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) { struct iolatency_grp *iolat = blkg_to_lat(blkg); - struct blk_iolatency *blkiolat = iolat->blkiolat; u64 oldval = iolat->min_lat_nsec; iolat->min_lat_nsec = val; @@ -766,9 +775,10 @@ static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) BLKIOLATENCY_MAX_WIN_SIZE); if (!oldval && val) - atomic_inc(&blkiolat->enabled); + return 1; if (oldval && !val) - atomic_dec(&blkiolat->enabled); + return -1; + return 0; } static void iolatency_clear_scaling(struct blkcg_gq *blkg) @@ -800,6 +810,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, u64 lat_val = 0; u64 oldval; int ret; + int enable = 0; ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx); if (ret) @@ -834,7 +845,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, blkg = ctx.blkg; oldval = iolat->min_lat_nsec; - iolatency_set_min_lat_nsec(blkg, lat_val); + enable = iolatency_set_min_lat_nsec(blkg, lat_val); + if (enable) { + WARN_ON_ONCE(!blk_get_queue(blkg->q)); + blkg_get(blkg); + } + if (oldval != iolat->min_lat_nsec) { iolatency_clear_scaling(blkg); } @@ -842,6 +858,24 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, ret = 0; out: blkg_conf_finish(&ctx); + if (ret == 0 && enable) { + struct iolatency_grp *tmp = blkg_to_lat(blkg); + struct blk_iolatency *blkiolat = tmp->blkiolat; + + blk_mq_freeze_queue(blkg->q); + + if (enable == 1) + atomic_inc(&blkiolat->enabled); + else if (enable == -1) + atomic_dec(&blkiolat->enabled); + else + WARN_ON_ONCE(1); + + blk_mq_unfreeze_queue(blkg->q); + + blkg_put(blkg); + blk_put_queue(blkg->q); + } return ret ?: nbytes; } @@ -977,8 +1011,14 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd) { struct iolatency_grp *iolat = pd_to_lat(pd); struct blkcg_gq *blkg = lat_to_blkg(iolat); + struct blk_iolatency *blkiolat = iolat->blkiolat; + int ret; - iolatency_set_min_lat_nsec(blkg, 0); + ret = iolatency_set_min_lat_nsec(blkg, 0); + if (ret == 1) + atomic_inc(&blkiolat->enabled); + if (ret == -1) + atomic_dec(&blkiolat->enabled); iolatency_clear_scaling(blkg); } diff --git a/block/blk-mq.h b/block/blk-mq.h @@ -36,7 +36,6 @@ struct blk_mq_ctx { struct kobject kobj; } ____cacheline_aligned_in_smp; -void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c @@ -4554,6 +4554,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, }, { "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM, }, { "SAMSUNG MZ7TD256HAFV-000L9", NULL, ATA_HORKAGE_NOLPM, }, + { "SAMSUNG MZ7TE512HMHP-000L1", "EXT06L0Q", ATA_HORKAGE_NOLPM, }, /* devices that don't properly handle queued TRIM commands */ { "Micron_M500IT_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c @@ -1253,6 +1253,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, * effects say only one namespace is affected. */ if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { + mutex_lock(&ctrl->scan_lock); nvme_start_freeze(ctrl); nvme_wait_freeze(ctrl); } @@ -1281,8 +1282,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) */ if (effects & NVME_CMD_EFFECTS_LBCC) nvme_update_formats(ctrl); - if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) + if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { nvme_unfreeze(ctrl); + mutex_unlock(&ctrl->scan_lock); + } if (effects & NVME_CMD_EFFECTS_CCC) nvme_init_identify(ctrl); if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) @@ -3401,6 +3404,7 @@ static void nvme_scan_work(struct work_struct *work) if (nvme_identify_ctrl(ctrl, &id)) return; + mutex_lock(&ctrl->scan_lock); nn = le32_to_cpu(id->nn); if (ctrl->vs >= NVME_VS(1, 1, 0) && !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { @@ -3409,6 +3413,7 @@ static void nvme_scan_work(struct work_struct *work) } nvme_scan_ns_sequential(ctrl, nn); out_free_id: + mutex_unlock(&ctrl->scan_lock); kfree(id); down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); @@ -3652,6 +3657,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->state = NVME_CTRL_NEW; spin_lock_init(&ctrl->lock); + mutex_init(&ctrl->scan_lock); INIT_LIST_HEAD(&ctrl->namespaces); init_rwsem(&ctrl->namespaces_rwsem); ctrl->dev = dev; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h @@ -154,6 +154,7 @@ struct nvme_ctrl { enum nvme_ctrl_state state; bool identified; spinlock_t lock; + struct mutex scan_lock; const struct nvme_ctrl_ops *ops; struct request_queue *admin_q; struct request_queue *connect_q; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c @@ -2557,16 +2557,7 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); - /* - * Introduce CONNECTING state from nvme-fc/rdma transports to mark the - * initializing procedure here. - */ - if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { - dev_warn(dev->ctrl.device, - "failed to mark controller CONNECTING\n"); - goto out; - } - + mutex_lock(&dev->shutdown_lock); result = nvme_pci_enable(dev); if (result) goto out; @@ -2585,6 +2576,17 @@ static void nvme_reset_work(struct work_struct *work) */ dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; dev->ctrl.max_segments = NVME_MAX_SEGS; + mutex_unlock(&dev->shutdown_lock); + + /* + * Introduce CONNECTING state from nvme-fc/rdma transports to mark the + * initializing procedure here. + */ + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { + dev_warn(dev->ctrl.device, + "failed to mark controller CONNECTING\n"); + goto out; + } result = nvme_init_identify(&dev->ctrl); if (result) diff --git a/fs/aio.c b/fs/aio.c @@ -1436,6 +1436,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) if (unlikely(!req->ki_filp)) return -EBADF; req->ki_complete = aio_complete_rw; + req->private = NULL; req->ki_pos = iocb->aio_offset; req->ki_flags = iocb_flags(req->ki_filp); if (iocb->aio_flags & IOCB_FLAG_RESFD) diff --git a/fs/buffer.c b/fs/buffer.c @@ -200,6 +200,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) struct buffer_head *head; struct page *page; int all_mapped = 1; + static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1); index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED); @@ -227,15 +228,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) * file io on the block device and getblk. It gets dealt with * elsewhere, don't buffer_error if we had some unmapped buffers */ - if (all_mapped) { - printk("__find_get_block_slow() failed. " - "block=%llu, b_blocknr=%llu\n", - (unsigned long long)block, - (unsigned long long)bh->b_blocknr); - printk("b_state=0x%08lx, b_size=%zu\n", - bh->b_state, bh->b_size); - printk("device %pg blocksize: %d\n", bdev, - 1 << bd_inode->i_blkbits); + ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE); + if (all_mapped && __ratelimit(&last_warned)) { + printk("__find_get_block_slow() failed. block=%llu, " + "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, " + "device %pg blocksize: %d\n", + (unsigned long long)block, + (unsigned long long)bh->b_blocknr, + bh->b_state, bh->b_size, bdev, + 1 << bd_inode->i_blkbits); } out_unlock: spin_unlock(&bd_mapping->private_lock); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h @@ -116,7 +116,13 @@ extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); static inline sector_t blk_rq_trace_sector(struct request *rq) { - return blk_rq_is_passthrough(rq) ? 0 : blk_rq_pos(rq); + /* + * Tracing should ignore starting sector for passthrough requests and + * requests where starting sector didn't get set. + */ + if (blk_rq_is_passthrough(rq) || blk_rq_pos(rq) == (sector_t)-1) + return 0; + return blk_rq_pos(rq); } static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq)