whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 67f2a93099cca369c056e591c82f1d1bc726c945
parent 14a996c383129525e55bab07e4857d08f6b61dda
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Wed, 12 Dec 2018 18:29:24 -0800

Merge tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Fix DM cache metadata to verify that a cache has block before trying
   to continue with operation that requires them.

 - Fix bio-based DM core's dm_make_request() to properly impose device
   limits on individual bios by making use of blk_queue_split().

 - Fix long-standing race with how DM thinp notified userspace of
   thin-pool mode state changes before they were actually made.

 - Fix the zoned target's bio completion handling; this is a fairly
   invassive fix at this stage but it is localized to the zoned target.
   Any zoned target users will benefit from this fix.

* tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm thin: bump target version
  dm thin: send event about thin-pool state change _after_ making it
  dm zoned: Fix target BIO completion handling
  dm: call blk_queue_split() to impose device limits on bios
  dm cache metadata: verify cache has blocks in blocks_are_clean_separate_dirty()

Diffstat:
Mdrivers/md/dm-cache-metadata.c | 4++++
Mdrivers/md/dm-thin.c | 72+++++++++++++++++++++++++++++++++++++-----------------------------------
Mdrivers/md/dm-zoned-target.c | 122+++++++++++++++++++++++++------------------------------------------------------
Mdrivers/md/dm.c | 2++
4 files changed, 81 insertions(+), 119 deletions(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c @@ -930,6 +930,10 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd, bool dirty_flag; *result = true; + if (from_cblock(cmd->cache_blocks) == 0) + /* Nothing to do */ + return 0; + r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root, from_cblock(cmd->cache_blocks), &cmd->dirty_cursor); if (r) { diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c @@ -195,7 +195,7 @@ static void throttle_unlock(struct throttle *t) struct dm_thin_new_mapping; /* - * The pool runs in 4 modes. Ordered in degraded order for comparisons. + * The pool runs in various modes. Ordered in degraded order for comparisons. */ enum pool_mode { PM_WRITE, /* metadata may be changed */ @@ -282,9 +282,38 @@ struct pool { mempool_t mapping_pool; }; -static enum pool_mode get_pool_mode(struct pool *pool); static void metadata_operation_failed(struct pool *pool, const char *op, int r); +static enum pool_mode get_pool_mode(struct pool *pool) +{ + return pool->pf.mode; +} + +static void notify_of_pool_mode_change(struct pool *pool) +{ + const char *descs[] = { + "write", + "out-of-data-space", + "read-only", + "read-only", + "fail" + }; + const char *extra_desc = NULL; + enum pool_mode mode = get_pool_mode(pool); + + if (mode == PM_OUT_OF_DATA_SPACE) { + if (!pool->pf.error_if_no_space) + extra_desc = " (queue IO)"; + else + extra_desc = " (error IO)"; + } + + dm_table_event(pool->ti->table); + DMINFO("%s: switching pool to %s%s mode", + dm_device_name(pool->pool_md), + descs[(int)mode], extra_desc ? : ""); +} + /* * Target context for a pool. */ @@ -2351,8 +2380,6 @@ static void do_waker(struct work_struct *ws) queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); } -static void notify_of_pool_mode_change_to_oods(struct pool *pool); - /* * We're holding onto IO to allow userland time to react. After the * timeout either the pool will have been resized (and thus back in @@ -2365,7 +2392,7 @@ static void do_no_space_timeout(struct work_struct *ws) if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) { pool->pf.error_if_no_space = true; - notify_of_pool_mode_change_to_oods(pool); + notify_of_pool_mode_change(pool); error_retry_list_with_code(pool, BLK_STS_NOSPC); } } @@ -2433,26 +2460,6 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *)) /*----------------------------------------------------------------*/ -static enum pool_mode get_pool_mode(struct pool *pool) -{ - return pool->pf.mode; -} - -static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode) -{ - dm_table_event(pool->ti->table); - DMINFO("%s: switching pool to %s mode", - dm_device_name(pool->pool_md), new_mode); -} - -static void notify_of_pool_mode_change_to_oods(struct pool *pool) -{ - if (!pool->pf.error_if_no_space) - notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)"); - else - notify_of_pool_mode_change(pool, "out-of-data-space (error IO)"); -} - static bool passdown_enabled(struct pool_c *pt) { return pt->adjusted_pf.discard_passdown; @@ -2501,8 +2508,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) switch (new_mode) { case PM_FAIL: - if (old_mode != new_mode) - notify_of_pool_mode_change(pool, "failure"); dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_fail; pool->process_discard = process_bio_fail; @@ -2516,8 +2521,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) case PM_OUT_OF_METADATA_SPACE: case PM_READ_ONLY: - if (!is_read_only_pool_mode(old_mode)) - notify_of_pool_mode_change(pool, "read-only"); dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_read_only; pool->process_discard = process_bio_success; @@ -2538,8 +2541,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) * alarming rate. Adjust your low water mark if you're * frequently seeing this mode. */ - if (old_mode != new_mode) - notify_of_pool_mode_change_to_oods(pool); pool->out_of_data_space = true; pool->process_bio = process_bio_read_only; pool->process_discard = process_discard_bio; @@ -2552,8 +2553,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) break; case PM_WRITE: - if (old_mode != new_mode) - notify_of_pool_mode_change(pool, "write"); if (old_mode == PM_OUT_OF_DATA_SPACE) cancel_delayed_work_sync(&pool->no_space_timeout); pool->out_of_data_space = false; @@ -2573,6 +2572,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) * doesn't cause an unexpected mode transition on resume. */ pt->adjusted_pf.mode = new_mode; + + if (old_mode != new_mode) + notify_of_pool_mode_change(pool); } static void abort_transaction(struct pool *pool) @@ -4023,7 +4025,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 20, 0}, + .version = {1, 21, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -4397,7 +4399,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 20, 0}, + .version = {1, 21, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c @@ -20,7 +20,6 @@ struct dmz_bioctx { struct dm_zone *zone; struct bio *bio; refcount_t ref; - blk_status_t status; }; /* @@ -78,65 +77,66 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status) { struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); - if (bioctx->status == BLK_STS_OK && status != BLK_STS_OK) - bioctx->status = status; - bio_endio(bio); + if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK) + bio->bi_status = status; + + if (refcount_dec_and_test(&bioctx->ref)) { + struct dm_zone *zone = bioctx->zone; + + if (zone) { + if (bio->bi_status != BLK_STS_OK && + bio_op(bio) == REQ_OP_WRITE && + dmz_is_seq(zone)) + set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags); + dmz_deactivate_zone(zone); + } + bio_endio(bio); + } } /* - * Partial clone read BIO completion callback. This terminates the + * Completion callback for an internally cloned target BIO. This terminates the * target BIO when there are no more references to its context. */ -static void dmz_read_bio_end_io(struct bio *bio) +static void dmz_clone_endio(struct bio *clone) { - struct dmz_bioctx *bioctx = bio->bi_private; - blk_status_t status = bio->bi_status; + struct dmz_bioctx *bioctx = clone->bi_private; + blk_status_t status = clone->bi_status; - bio_put(bio); + bio_put(clone); dmz_bio_endio(bioctx->bio, status); } /* - * Issue a BIO to a zone. The BIO may only partially process the + * Issue a clone of a target BIO. The clone may only partially process the * original target BIO. */ -static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone, - struct bio *bio, sector_t chunk_block, - unsigned int nr_blocks) +static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone, + struct bio *bio, sector_t chunk_block, + unsigned int nr_blocks) { struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); - sector_t sector; struct bio *clone; - /* BIO remap sector */ - sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block); - - /* If the read is not partial, there is no need to clone the BIO */ - if (nr_blocks == dmz_bio_blocks(bio)) { - /* Setup and submit the BIO */ - bio->bi_iter.bi_sector = sector; - refcount_inc(&bioctx->ref); - generic_make_request(bio); - return 0; - } - - /* Partial BIO: we need to clone the BIO */ clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set); if (!clone) return -ENOMEM; - /* Setup the clone */ - clone->bi_iter.bi_sector = sector; + bio_set_dev(clone, dmz->dev->bdev); + clone->bi_iter.bi_sector = + dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block); clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT; - clone->bi_end_io = dmz_read_bio_end_io; + clone->bi_end_io = dmz_clone_endio; clone->bi_private = bioctx; bio_advance(bio, clone->bi_iter.bi_size); - /* Submit the clone */ refcount_inc(&bioctx->ref); generic_make_request(clone); + if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone)) + zone->wp_block += nr_blocks; + return 0; } @@ -214,7 +214,7 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone, if (nr_blocks) { /* Valid blocks found: read them */ nr_blocks = min_t(unsigned int, nr_blocks, end_block - chunk_block); - ret = dmz_submit_read_bio(dmz, rzone, bio, chunk_block, nr_blocks); + ret = dmz_submit_bio(dmz, rzone, bio, chunk_block, nr_blocks); if (ret) return ret; chunk_block += nr_blocks; @@ -229,25 +229,6 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone, } /* - * Issue a write BIO to a zone. - */ -static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone, - struct bio *bio, sector_t chunk_block, - unsigned int nr_blocks) -{ - struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); - - /* Setup and submit the BIO */ - bio_set_dev(bio, dmz->dev->bdev); - bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block); - refcount_inc(&bioctx->ref); - generic_make_request(bio); - - if (dmz_is_seq(zone)) - zone->wp_block += nr_blocks; -} - -/* * Write blocks directly in a data zone, at the write pointer. * If a buffer zone is assigned, invalidate the blocks written * in place. @@ -265,7 +246,9 @@ static int dmz_handle_direct_write(struct dmz_target *dmz, return -EROFS; /* Submit write */ - dmz_submit_write_bio(dmz, zone, bio, chunk_block, nr_blocks); + ret = dmz_submit_bio(dmz, zone, bio, chunk_block, nr_blocks); + if (ret) + return ret; /* * Validate the blocks in the data zone and invalidate @@ -301,7 +284,9 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz, return -EROFS; /* Submit write */ - dmz_submit_write_bio(dmz, bzone, bio, chunk_block, nr_blocks); + ret = dmz_submit_bio(dmz, bzone, bio, chunk_block, nr_blocks); + if (ret) + return ret; /* * Validate the blocks in the buffer zone @@ -600,7 +585,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) bioctx->zone = NULL; bioctx->bio = bio; refcount_set(&bioctx->ref, 1); - bioctx->status = BLK_STS_OK; /* Set the BIO pending in the flush list */ if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) { @@ -624,35 +608,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) } /* - * Completed target BIO processing. - */ -static int dmz_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error) -{ - struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); - - if (bioctx->status == BLK_STS_OK && *error) - bioctx->status = *error; - - if (!refcount_dec_and_test(&bioctx->ref)) - return DM_ENDIO_INCOMPLETE; - - /* Done */ - bio->bi_status = bioctx->status; - - if (bioctx->zone) { - struct dm_zone *zone = bioctx->zone; - - if (*error && bio_op(bio) == REQ_OP_WRITE) { - if (dmz_is_seq(zone)) - set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags); - } - dmz_deactivate_zone(zone); - } - - return DM_ENDIO_DONE; -} - -/* * Get zoned device information. */ static int dmz_get_zoned_device(struct dm_target *ti, char *path) @@ -946,7 +901,6 @@ static struct target_type dmz_type = { .ctr = dmz_ctr, .dtr = dmz_dtr, .map = dmz_map, - .end_io = dmz_end_io, .io_hints = dmz_io_hints, .prepare_ioctl = dmz_prepare_ioctl, .postsuspend = dmz_suspend, diff --git a/drivers/md/dm.c b/drivers/md/dm.c @@ -1593,6 +1593,8 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, return ret; } + blk_queue_split(md->queue, &bio); + init_clone_info(&ci, md, map, bio); if (bio->bi_opf & REQ_PREFLUSH) {