whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 4f1cbe078546914538d8aabba04db984da68dcbf
parent 3e28fb0fcb69dbedfe254939143198b46d83bfa1
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Fri,  5 Apr 2019 15:34:33 -1000

Merge tag 'for-5.1/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Two queue_limits stacking fixes: disable discards if underlying
   driver does. And propagate BDI_CAP_STABLE_WRITES to fix sporadic
   checksum errors.

 - Fix that reverts a DM core limit that wasn't needed given that
   dm-crypt was already updated to impose an equivalent limit.

 - Fix dm-init to properly establish 'const' for __initconst array.

 - Fix deadlock in DM integrity target that occurs when overlapping IO
   is being issued to it. And two smaller fixes to the DM integrity
   target.

* tag 'for-5.1/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm integrity: fix deadlock with overlapping I/O
  dm: disable DISCARD if the underlying storage no longer supports it
  dm table: propagate BDI_CAP_STABLE_WRITES to fix sporadic checksum errors
  dm: revert 8f50e358153d ("dm: limit the max bio size as BIO_MAX_PAGES * PAGE_SIZE")
  dm init: fix const confusion for dm_allowed_targets array
  dm integrity: make dm_integrity_init and dm_integrity_exit static
  dm integrity: change memcmp to strncmp in dm_integrity_ctr

Diffstat:
Mdrivers/md/dm-core.h | 1+
Mdrivers/md/dm-init.c | 2+-
Mdrivers/md/dm-integrity.c | 16+++++++---------
Mdrivers/md/dm-rq.c | 11+++++++----
Mdrivers/md/dm-table.c | 39+++++++++++++++++++++++++++++++++++++++
Mdrivers/md/dm.c | 30+++++++++++++++++-------------
6 files changed, 72 insertions(+), 27 deletions(-)

diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h @@ -115,6 +115,7 @@ struct mapped_device { struct srcu_struct io_barrier; }; +void disable_discard(struct mapped_device *md); void disable_write_same(struct mapped_device *md); void disable_write_zeroes(struct mapped_device *md); diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c @@ -36,7 +36,7 @@ struct dm_device { struct list_head list; }; -const char *dm_allowed_targets[] __initconst = { +const char * const dm_allowed_targets[] __initconst = { "crypt", "delay", "linear", diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c @@ -913,7 +913,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2) { return range1->logical_sector < range2->logical_sector + range2->n_sectors && - range2->logical_sector + range2->n_sectors > range2->logical_sector; + range1->logical_sector + range1->n_sectors > range2->logical_sector; } static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting) @@ -959,8 +959,6 @@ static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity struct dm_integrity_range *last_range = list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry); struct task_struct *last_range_task; - if (!ranges_overlap(range, last_range)) - break; last_range_task = last_range->task; list_del(&last_range->wait_entry); if (!add_new_range(ic, last_range, false)) { @@ -3185,7 +3183,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) journal_watermark = val; else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) sync_msec = val; - else if (!memcmp(opt_string, "meta_device:", strlen("meta_device:"))) { + else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) { if (ic->meta_dev) { dm_put_device(ti, ic->meta_dev); ic->meta_dev = NULL; @@ -3204,17 +3202,17 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } ic->sectors_per_block = val >> SECTOR_SHIFT; - } else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { + } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, "Invalid internal_hash argument"); if (r) goto bad; - } else if (!memcmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { + } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error, "Invalid journal_crypt argument"); if (r) goto bad; - } else if (!memcmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { + } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error, "Invalid journal_mac argument"); if (r) @@ -3616,7 +3614,7 @@ static struct target_type integrity_target = { .io_hints = dm_integrity_io_hints, }; -int __init dm_integrity_init(void) +static int __init dm_integrity_init(void) { int r; @@ -3635,7 +3633,7 @@ int __init dm_integrity_init(void) return r; } -void dm_integrity_exit(void) +static void __exit dm_integrity_exit(void) { dm_unregister_target(&integrity_target); kmem_cache_destroy(journal_io_cache); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c @@ -222,11 +222,14 @@ static void dm_done(struct request *clone, blk_status_t error, bool mapped) } if (unlikely(error == BLK_STS_TARGET)) { - if (req_op(clone) == REQ_OP_WRITE_SAME && - !clone->q->limits.max_write_same_sectors) + if (req_op(clone) == REQ_OP_DISCARD && + !clone->q->limits.max_discard_sectors) + disable_discard(tio->md); + else if (req_op(clone) == REQ_OP_WRITE_SAME && + !clone->q->limits.max_write_same_sectors) disable_write_same(tio->md); - if (req_op(clone) == REQ_OP_WRITE_ZEROES && - !clone->q->limits.max_write_zeroes_sectors) + else if (req_op(clone) == REQ_OP_WRITE_ZEROES && + !clone->q->limits.max_write_zeroes_sectors) disable_write_zeroes(tio->md); } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c @@ -1844,6 +1844,36 @@ static bool dm_table_supports_secure_erase(struct dm_table *t) return true; } +static int device_requires_stable_pages(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && bdi_cap_stable_pages_required(q->backing_dev_info); +} + +/* + * If any underlying device requires stable pages, a table must require + * them as well. Only targets that support iterate_devices are considered: + * don't want error, zero, etc to require stable pages. + */ +static bool dm_table_requires_stable_pages(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i; + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, device_requires_stable_pages, NULL)) + return true; + } + + return false; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1897,6 +1927,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, dm_table_verify_integrity(t); /* + * Some devices don't use blk_integrity but still want stable pages + * because they do their own checksumming. + */ + if (dm_table_requires_stable_pages(t)) + q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + else + q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; + + /* * Determine whether or not this queue's I/O timings contribute * to the entropy pool, Only request-based targets use this. * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not diff --git a/drivers/md/dm.c b/drivers/md/dm.c @@ -945,6 +945,15 @@ static void dec_pending(struct dm_io *io, blk_status_t error) } } +void disable_discard(struct mapped_device *md) +{ + struct queue_limits *limits = dm_get_queue_limits(md); + + /* device doesn't really support DISCARD, disable it */ + limits->max_discard_sectors = 0; + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue); +} + void disable_write_same(struct mapped_device *md) { struct queue_limits *limits = dm_get_queue_limits(md); @@ -970,11 +979,14 @@ static void clone_endio(struct bio *bio) dm_endio_fn endio = tio->ti->type->end_io; if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) { - if (bio_op(bio) == REQ_OP_WRITE_SAME && - !bio->bi_disk->queue->limits.max_write_same_sectors) + if (bio_op(bio) == REQ_OP_DISCARD && + !bio->bi_disk->queue->limits.max_discard_sectors) + disable_discard(md); + else if (bio_op(bio) == REQ_OP_WRITE_SAME && + !bio->bi_disk->queue->limits.max_write_same_sectors) disable_write_same(md); - if (bio_op(bio) == REQ_OP_WRITE_ZEROES && - !bio->bi_disk->queue->limits.max_write_zeroes_sectors) + else if (bio_op(bio) == REQ_OP_WRITE_ZEROES && + !bio->bi_disk->queue->limits.max_write_zeroes_sectors) disable_write_zeroes(md); } @@ -1042,15 +1054,7 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) return -EINVAL; } - /* - * BIO based queue uses its own splitting. When multipage bvecs - * is switched on, size of the incoming bio may be too big to - * be handled in some targets, such as crypt. - * - * When these targets are ready for the big bio, we can remove - * the limit. - */ - ti->max_io_len = min_t(uint32_t, len, BIO_MAX_PAGES * PAGE_SIZE); + ti->max_io_len = (uint32_t) len; return 0; }