whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit bd799eb63db4c61a5f2dc941672391fbca5bcab4
parent 6ec067e3a4492569699676d75a21fcee22fefc5b
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Sun,  9 Dec 2018 09:46:54 -0800

Merge tag 'libnvdimm-fixes-4.20-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm fixes from Dan Williams:
 "A regression fix for the Address Range Scrub implementation, yes
  another one, and support for platforms that misalign persistent memory
  relative to the Linux memory hotplug section constraint. Longer term,
  support for sub-section memory hotplug would alleviate alignment
  waste, but until then this hack allows a 'struct page' memmap to be
  established for these misaligned memory regions.

  These have all appeared in a -next release, and thanks to Patrick for
  reporting and testing the alignment padding fix.

  Summary:

   - Unless and until the core mm handles memory hotplug units smaller
     than a section (128M), persistent memory namespaces must be padded
     to section alignment.

     The libnvdimm core already handled section collision with "System
     RAM", but some configurations overlap independent "Persistent
     Memory" ranges within a section, so additional padding injection is
     added for that case.

   - The recent reworks of the ARS (address range scrub) state machine
     to reduce the number of state flags inadvertantly missed a
     conversion of acpi_nfit_ars_rescan() call sites. Fix the regression
     whereby user-requested ARS results in a "short" scrub rather than a
     "long" scrub.

   - Fixup the unit tests to handle / test the 128M section alignment of
     mocked test resources.

* tag 'libnvdimm-fixes-4.20-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  acpi/nfit: Fix user-initiated ARS to be "ARS-long" rather than "ARS-short"
  libnvdimm, pfn: Pad pfn namespaces relative to other regions
  tools/testing/nvdimm: Align test resources to 128M

Diffstat:
Mdrivers/acpi/nfit/core.c | 2+-
Mdrivers/nvdimm/nd-core.h | 2++
Mdrivers/nvdimm/pfn_devs.c | 64+++++++++++++++++++++++++++++++++++++---------------------------
Mdrivers/nvdimm/region_devs.c | 41+++++++++++++++++++++++++++++++++++++++++
Mtools/testing/nvdimm/test/nfit.c | 35+++++++++++++++++++++++++++++++++--
5 files changed, 114 insertions(+), 30 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c @@ -1308,7 +1308,7 @@ static ssize_t scrub_store(struct device *dev, if (nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); - rc = acpi_nfit_ars_rescan(acpi_desc, 0); + rc = acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG); } device_unlock(dev); if (rc) diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h @@ -111,6 +111,8 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, struct nd_mapping *nd_mapping, resource_size_t *overlap); resource_size_t nd_blk_available_dpa(struct nd_region *nd_region); resource_size_t nd_region_available_dpa(struct nd_region *nd_region); +int nd_region_conflict(struct nd_region *nd_region, resource_size_t start, + resource_size_t size); resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, struct nd_label_id *label_id); int alias_dpa_busy(struct device *dev, void *data); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c @@ -649,14 +649,47 @@ static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) ALIGN_DOWN(phys, nd_pfn->align)); } +/* + * Check if pmem collides with 'System RAM', or other regions when + * section aligned. Trim it accordingly. + */ +static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trunc) +{ + struct nd_namespace_common *ndns = nd_pfn->ndns; + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); + const resource_size_t start = nsio->res.start; + const resource_size_t end = start + resource_size(&nsio->res); + resource_size_t adjust, size; + + *start_pad = 0; + *end_trunc = 0; + + adjust = start - PHYS_SECTION_ALIGN_DOWN(start); + size = resource_size(&nsio->res) + adjust; + if (region_intersects(start - adjust, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED + || nd_region_conflict(nd_region, start - adjust, size)) + *start_pad = PHYS_SECTION_ALIGN_UP(start) - start; + + /* Now check that end of the range does not collide. */ + adjust = PHYS_SECTION_ALIGN_UP(end) - end; + size = resource_size(&nsio->res) + adjust; + if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED + || !IS_ALIGNED(end, nd_pfn->align) + || nd_region_conflict(nd_region, start, size + adjust)) + *end_trunc = end - phys_pmem_align_down(nd_pfn, end); +} + static int nd_pfn_init(struct nd_pfn *nd_pfn) { u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; struct nd_namespace_common *ndns = nd_pfn->ndns; - u32 start_pad = 0, end_trunc = 0; + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t start, size; - struct nd_namespace_io *nsio; struct nd_region *nd_region; + u32 start_pad, end_trunc; struct nd_pfn_sb *pfn_sb; unsigned long npfns; phys_addr_t offset; @@ -688,30 +721,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) memset(pfn_sb, 0, sizeof(*pfn_sb)); - /* - * Check if pmem collides with 'System RAM' when section aligned and - * trim it accordingly - */ - nsio = to_nd_namespace_io(&ndns->dev); - start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start); - size = resource_size(&nsio->res); - if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED) { - start = nsio->res.start; - start_pad = PHYS_SECTION_ALIGN_UP(start) - start; - } - - start = nsio->res.start; - size = PHYS_SECTION_ALIGN_UP(start + size) - start; - if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED - || !IS_ALIGNED(start + resource_size(&nsio->res), - nd_pfn->align)) { - size = resource_size(&nsio->res); - end_trunc = start + size - phys_pmem_align_down(nd_pfn, - start + size); - } - + trim_pfn_device(nd_pfn, &start_pad, &end_trunc); if (start_pad + end_trunc) dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", dev_name(&ndns->dev), start_pad + end_trunc); @@ -722,7 +732,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * implementation will limit the pfns advertised through * ->direct_access() to those that are included in the memmap. */ - start += start_pad; + start = nsio->res.start + start_pad; size = resource_size(&nsio->res); npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K) / PAGE_SIZE); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c @@ -1184,6 +1184,47 @@ int nvdimm_has_cache(struct nd_region *nd_region) } EXPORT_SYMBOL_GPL(nvdimm_has_cache); +struct conflict_context { + struct nd_region *nd_region; + resource_size_t start, size; +}; + +static int region_conflict(struct device *dev, void *data) +{ + struct nd_region *nd_region; + struct conflict_context *ctx = data; + resource_size_t res_end, region_end, region_start; + + if (!is_memory(dev)) + return 0; + + nd_region = to_nd_region(dev); + if (nd_region == ctx->nd_region) + return 0; + + res_end = ctx->start + ctx->size; + region_start = nd_region->ndr_start; + region_end = region_start + nd_region->ndr_size; + if (ctx->start >= region_start && ctx->start < region_end) + return -EBUSY; + if (res_end > region_start && res_end <= region_end) + return -EBUSY; + return 0; +} + +int nd_region_conflict(struct nd_region *nd_region, resource_size_t start, + resource_size_t size) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); + struct conflict_context ctx = { + .nd_region = nd_region, + .start = start, + .size = size, + }; + + return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict); +} + void __exit nd_region_devs_exit(void) { ida_destroy(&region_ida); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c @@ -15,6 +15,7 @@ #include <linux/dma-mapping.h> #include <linux/workqueue.h> #include <linux/libnvdimm.h> +#include <linux/genalloc.h> #include <linux/vmalloc.h> #include <linux/device.h> #include <linux/module.h> @@ -215,6 +216,8 @@ struct nfit_test { static struct workqueue_struct *nfit_wq; +static struct gen_pool *nfit_pool; + static struct nfit_test *to_nfit_test(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -1132,6 +1135,9 @@ static void release_nfit_res(void *data) list_del(&nfit_res->list); spin_unlock(&nfit_test_lock); + if (resource_size(&nfit_res->res) >= DIMM_SIZE) + gen_pool_free(nfit_pool, nfit_res->res.start, + resource_size(&nfit_res->res)); vfree(nfit_res->buf); kfree(nfit_res); } @@ -1144,7 +1150,7 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, GFP_KERNEL); int rc; - if (!buf || !nfit_res) + if (!buf || !nfit_res || !*dma) goto err; rc = devm_add_action(dev, release_nfit_res, nfit_res); if (rc) @@ -1164,6 +1170,8 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, return nfit_res->buf; err: + if (*dma && size >= DIMM_SIZE) + gen_pool_free(nfit_pool, *dma, size); if (buf) vfree(buf); kfree(nfit_res); @@ -1172,9 +1180,16 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma) { + struct genpool_data_align data = { + .align = SZ_128M, + }; void *buf = vmalloc(size); - *dma = (unsigned long) buf; + if (size >= DIMM_SIZE) + *dma = gen_pool_alloc_algo(nfit_pool, size, + gen_pool_first_fit_align, &data); + else + *dma = (unsigned long) buf; return __test_alloc(t, size, dma, buf); } @@ -2839,6 +2854,17 @@ static __init int nfit_test_init(void) goto err_register; } + nfit_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE); + if (!nfit_pool) { + rc = -ENOMEM; + goto err_register; + } + + if (gen_pool_add(nfit_pool, SZ_4G, SZ_4G, NUMA_NO_NODE)) { + rc = -ENOMEM; + goto err_register; + } + for (i = 0; i < NUM_NFITS; i++) { struct nfit_test *nfit_test; struct platform_device *pdev; @@ -2894,6 +2920,9 @@ static __init int nfit_test_init(void) return 0; err_register: + if (nfit_pool) + gen_pool_destroy(nfit_pool); + destroy_workqueue(nfit_wq); for (i = 0; i < NUM_NFITS; i++) if (instances[i]) @@ -2917,6 +2946,8 @@ static __exit void nfit_test_exit(void) platform_driver_unregister(&nfit_test_driver); nfit_test_teardown(); + gen_pool_destroy(nfit_pool); + for (i = 0; i < NUM_NFITS; i++) put_device(&instances[i]->pdev.dev); class_destroy(nfit_test_dimm);