whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 4d8d9f540b780f7a3688a72275aecd8fd99c99e5
parent a0efc03b7925c4b606b1c49feb394859754e1cc8
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Thu, 13 Sep 2018 19:16:11 -1000

Merge tag 'for-linus-20180913' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Three fixes that should go into this series. This contains:

   - Increase number of policies supported by blk-cgroup.

     With blk-iolatency, we now have four in kernel, but we had a hard
     limit of three...

   - Fix regression in null_blk, where the zoned supported broke
     queue_mode=0 (bio based).

   - NVMe pull request, with a single fix for an issue in the rdma code"

* tag 'for-linus-20180913' of git://git.kernel.dk/linux-block:
  null_blk: fix zoned support for non-rq based operation
  blk-cgroup: increase number of supported policies
  nvmet-rdma: fix possible bogus dereference under heavy load

Diffstat:
Mblock/blk-cgroup.c | 4+++-
Mdrivers/block/null_blk.h | 17++++++++++-------
Mdrivers/block/null_blk_main.c | 45++++++++++++++++++++++++++++++++++++++-------
Mdrivers/block/null_blk_zoned.c | 34++++++++++++++--------------------
Mdrivers/nvme/target/rdma.c | 27+++++++++++++++++++++++++--
Minclude/linux/blkdev.h | 2+-
6 files changed, 91 insertions(+), 38 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c @@ -1510,8 +1510,10 @@ int blkcg_policy_register(struct blkcg_policy *pol) for (i = 0; i < BLKCG_MAX_POLS; i++) if (!blkcg_policy[i]) break; - if (i >= BLKCG_MAX_POLS) + if (i >= BLKCG_MAX_POLS) { + pr_warn("blkcg_policy_register: BLKCG_MAX_POLS too small\n"); goto err_unlock; + } /* Make sure cpd/pd_alloc_fn and cpd/pd_free_fn in pairs */ if ((!pol->cpd_alloc_fn ^ !pol->cpd_free_fn) || diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h @@ -87,10 +87,10 @@ struct nullb { #ifdef CONFIG_BLK_DEV_ZONED int null_zone_init(struct nullb_device *dev); void null_zone_exit(struct nullb_device *dev); -blk_status_t null_zone_report(struct nullb *nullb, - struct nullb_cmd *cmd); -void null_zone_write(struct nullb_cmd *cmd); -void null_zone_reset(struct nullb_cmd *cmd); +blk_status_t null_zone_report(struct nullb *nullb, struct bio *bio); +void null_zone_write(struct nullb_cmd *cmd, sector_t sector, + unsigned int nr_sectors); +void null_zone_reset(struct nullb_cmd *cmd, sector_t sector); #else static inline int null_zone_init(struct nullb_device *dev) { @@ -98,11 +98,14 @@ static inline int null_zone_init(struct nullb_device *dev) } static inline void null_zone_exit(struct nullb_device *dev) {} static inline blk_status_t null_zone_report(struct nullb *nullb, - struct nullb_cmd *cmd) + struct bio *bio) { return BLK_STS_NOTSUPP; } -static inline void null_zone_write(struct nullb_cmd *cmd) {} -static inline void null_zone_reset(struct nullb_cmd *cmd) {} +static inline void null_zone_write(struct nullb_cmd *cmd, sector_t sector, + unsigned int nr_sectors) +{ +} +static inline void null_zone_reset(struct nullb_cmd *cmd, sector_t sector) {} #endif /* CONFIG_BLK_DEV_ZONED */ #endif /* __NULL_BLK_H */ diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c @@ -1157,16 +1157,33 @@ static void null_restart_queue_async(struct nullb *nullb) } } +static bool cmd_report_zone(struct nullb *nullb, struct nullb_cmd *cmd) +{ + struct nullb_device *dev = cmd->nq->dev; + + if (dev->queue_mode == NULL_Q_BIO) { + if (bio_op(cmd->bio) == REQ_OP_ZONE_REPORT) { + cmd->error = null_zone_report(nullb, cmd->bio); + return true; + } + } else { + if (req_op(cmd->rq) == REQ_OP_ZONE_REPORT) { + cmd->error = null_zone_report(nullb, cmd->rq->bio); + return true; + } + } + + return false; +} + static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) { struct nullb_device *dev = cmd->nq->dev; struct nullb *nullb = dev->nullb; int err = 0; - if (req_op(cmd->rq) == REQ_OP_ZONE_REPORT) { - cmd->error = null_zone_report(nullb, cmd); + if (cmd_report_zone(nullb, cmd)) goto out; - } if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) { struct request *rq = cmd->rq; @@ -1234,10 +1251,24 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) cmd->error = errno_to_blk_status(err); if (!cmd->error && dev->zoned) { - if (req_op(cmd->rq) == REQ_OP_WRITE) - null_zone_write(cmd); - else if (req_op(cmd->rq) == REQ_OP_ZONE_RESET) - null_zone_reset(cmd); + sector_t sector; + unsigned int nr_sectors; + int op; + + if (dev->queue_mode == NULL_Q_BIO) { + op = bio_op(cmd->bio); + sector = cmd->bio->bi_iter.bi_sector; + nr_sectors = cmd->bio->bi_iter.bi_size >> 9; + } else { + op = req_op(cmd->rq); + sector = blk_rq_pos(cmd->rq); + nr_sectors = blk_rq_sectors(cmd->rq); + } + + if (op == REQ_OP_WRITE) + null_zone_write(cmd, sector, nr_sectors); + else if (op == REQ_OP_ZONE_RESET) + null_zone_reset(cmd, sector); } out: /* Complete IO by inline, softirq or timer */ diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c @@ -48,8 +48,8 @@ void null_zone_exit(struct nullb_device *dev) kvfree(dev->zones); } -static void null_zone_fill_rq(struct nullb_device *dev, struct request *rq, - unsigned int zno, unsigned int nr_zones) +static void null_zone_fill_bio(struct nullb_device *dev, struct bio *bio, + unsigned int zno, unsigned int nr_zones) { struct blk_zone_report_hdr *hdr = NULL; struct bio_vec bvec; @@ -57,7 +57,7 @@ static void null_zone_fill_rq(struct nullb_device *dev, struct request *rq, void *addr; unsigned int zones_to_cpy; - bio_for_each_segment(bvec, rq->bio, iter) { + bio_for_each_segment(bvec, bio, iter) { addr = kmap_atomic(bvec.bv_page); zones_to_cpy = bvec.bv_len / sizeof(struct blk_zone); @@ -84,29 +84,24 @@ static void null_zone_fill_rq(struct nullb_device *dev, struct request *rq, } } -blk_status_t null_zone_report(struct nullb *nullb, - struct nullb_cmd *cmd) +blk_status_t null_zone_report(struct nullb *nullb, struct bio *bio) { struct nullb_device *dev = nullb->dev; - struct request *rq = cmd->rq; - unsigned int zno = null_zone_no(dev, blk_rq_pos(rq)); + unsigned int zno = null_zone_no(dev, bio->bi_iter.bi_sector); unsigned int nr_zones = dev->nr_zones - zno; - unsigned int max_zones = (blk_rq_bytes(rq) / - sizeof(struct blk_zone)) - 1; + unsigned int max_zones; + max_zones = (bio->bi_iter.bi_size / sizeof(struct blk_zone)) - 1; nr_zones = min_t(unsigned int, nr_zones, max_zones); - - null_zone_fill_rq(nullb->dev, rq, zno, nr_zones); + null_zone_fill_bio(nullb->dev, bio, zno, nr_zones); return BLK_STS_OK; } -void null_zone_write(struct nullb_cmd *cmd) +void null_zone_write(struct nullb_cmd *cmd, sector_t sector, + unsigned int nr_sectors) { struct nullb_device *dev = cmd->nq->dev; - struct request *rq = cmd->rq; - sector_t sector = blk_rq_pos(rq); - unsigned int rq_sectors = blk_rq_sectors(rq); unsigned int zno = null_zone_no(dev, sector); struct blk_zone *zone = &dev->zones[zno]; @@ -118,7 +113,7 @@ void null_zone_write(struct nullb_cmd *cmd) case BLK_ZONE_COND_EMPTY: case BLK_ZONE_COND_IMP_OPEN: /* Writes must be at the write pointer position */ - if (blk_rq_pos(rq) != zone->wp) { + if (sector != zone->wp) { cmd->error = BLK_STS_IOERR; break; } @@ -126,7 +121,7 @@ void null_zone_write(struct nullb_cmd *cmd) if (zone->cond == BLK_ZONE_COND_EMPTY) zone->cond = BLK_ZONE_COND_IMP_OPEN; - zone->wp += rq_sectors; + zone->wp += nr_sectors; if (zone->wp == zone->start + zone->len) zone->cond = BLK_ZONE_COND_FULL; break; @@ -137,11 +132,10 @@ void null_zone_write(struct nullb_cmd *cmd) } } -void null_zone_reset(struct nullb_cmd *cmd) +void null_zone_reset(struct nullb_cmd *cmd, sector_t sector) { struct nullb_device *dev = cmd->nq->dev; - struct request *rq = cmd->rq; - unsigned int zno = null_zone_no(dev, blk_rq_pos(rq)); + unsigned int zno = null_zone_no(dev, sector); struct blk_zone *zone = &dev->zones[zno]; zone->cond = BLK_ZONE_COND_EMPTY; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c @@ -66,6 +66,7 @@ struct nvmet_rdma_rsp { struct nvmet_req req; + bool allocated; u8 n_rdma; u32 flags; u32 invalidate_rkey; @@ -174,11 +175,19 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue) unsigned long flags; spin_lock_irqsave(&queue->rsps_lock, flags); - rsp = list_first_entry(&queue->free_rsps, + rsp = list_first_entry_or_null(&queue->free_rsps, struct nvmet_rdma_rsp, free_list); - list_del(&rsp->free_list); + if (likely(rsp)) + list_del(&rsp->free_list); spin_unlock_irqrestore(&queue->rsps_lock, flags); + if (unlikely(!rsp)) { + rsp = kmalloc(sizeof(*rsp), GFP_KERNEL); + if (unlikely(!rsp)) + return NULL; + rsp->allocated = true; + } + return rsp; } @@ -187,6 +196,11 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) { unsigned long flags; + if (rsp->allocated) { + kfree(rsp); + return; + } + spin_lock_irqsave(&rsp->queue->rsps_lock, flags); list_add_tail(&rsp->free_list, &rsp->queue->free_rsps); spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags); @@ -776,6 +790,15 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) cmd->queue = queue; rsp = nvmet_rdma_get_rsp(queue); + if (unlikely(!rsp)) { + /* + * we get here only under memory pressure, + * silently drop and have the host retry + * as we can't even fail it. + */ + nvmet_rdma_post_recv(queue->dev, cmd); + return; + } rsp->queue = queue; rsp->cmd = cmd; rsp->flags = 0; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h @@ -54,7 +54,7 @@ struct blk_stat_callback; * Maximum number of blkcg policies allowed to be registered concurrently. * Defined here to simplify include dependency. */ -#define BLKCG_MAX_POLS 3 +#define BLKCG_MAX_POLS 5 typedef void (rq_end_io_fn)(struct request *, blk_status_t);