whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit d002924f934ad4d249f6716680433c912831d046
parent 33e17876ea4edcd7f5c01efa78e8d02889261abf
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Fri, 24 Aug 2018 08:45:19 -0700

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:
 "virtio, vhost: fixes, tweaks

  No new features but a bunch of tweaks such as switching balloon from
  oom notifier to shrinker"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  vhost/scsi: increase VHOST_SCSI_PREALLOC_PROT_SGLS to 2048
  vhost: allow vhost-scsi driver to be built-in
  virtio: pci-legacy: Validate queue pfn
  virtio: mmio-v1: Validate queue PFN
  virtio_balloon: replace oom notifier with shrinker
  virtio-balloon: kzalloc the vb struct
  virtio-balloon: remove BUG() in init_vqs

Diffstat:
Mdrivers/vhost/Kconfig | 2+-
Mdrivers/vhost/scsi.c | 2+-
Mdrivers/virtio/virtio_balloon.c | 125++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Mdrivers/virtio/virtio_mmio.c | 20++++++++++++++++++--
Mdrivers/virtio/virtio_pci_legacy.c | 14++++++++++++--
5 files changed, 99 insertions(+), 64 deletions(-)

diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig @@ -12,7 +12,7 @@ config VHOST_NET config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" - depends on TARGET_CORE && EVENTFD && m + depends on TARGET_CORE && EVENTFD select VHOST default n ---help--- diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c @@ -55,7 +55,7 @@ #define VHOST_SCSI_DEFAULT_TAGS 256 #define VHOST_SCSI_PREALLOC_SGLS 2048 #define VHOST_SCSI_PREALLOC_UPAGES 2048 -#define VHOST_SCSI_PREALLOC_PROT_SGLS 512 +#define VHOST_SCSI_PREALLOC_PROT_SGLS 2048 struct vhost_scsi_inflight { /* Wait for the flush operation to finish */ diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c @@ -27,7 +27,6 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/balloon_compaction.h> -#include <linux/oom.h> #include <linux/wait.h> #include <linux/mm.h> #include <linux/mount.h> @@ -40,13 +39,8 @@ */ #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 -#define OOM_VBALLOON_DEFAULT_PAGES 256 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 -static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES; -module_param(oom_pages, int, S_IRUSR | S_IWUSR); -MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); - #ifdef CONFIG_BALLOON_COMPACTION static struct vfsmount *balloon_mnt; #endif @@ -86,8 +80,8 @@ struct virtio_balloon { /* Memory statistics */ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; - /* To register callback in oom notifier call chain */ - struct notifier_block nb; + /* To register a shrinker to shrink memory upon memory pressure */ + struct shrinker shrinker; }; static struct virtio_device_id id_table[] = { @@ -365,38 +359,6 @@ static void update_balloon_size(struct virtio_balloon *vb) &actual); } -/* - * virtballoon_oom_notify - release pages when system is under severe - * memory pressure (called from out_of_memory()) - * @self : notifier block struct - * @dummy: not used - * @parm : returned - number of freed pages - * - * The balancing of memory by use of the virtio balloon should not cause - * the termination of processes while there are pages in the balloon. - * If virtio balloon manages to release some memory, it will make the - * system return and retry the allocation that forced the OOM killer - * to run. - */ -static int virtballoon_oom_notify(struct notifier_block *self, - unsigned long dummy, void *parm) -{ - struct virtio_balloon *vb; - unsigned long *freed; - unsigned num_freed_pages; - - vb = container_of(self, struct virtio_balloon, nb); - if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) - return NOTIFY_OK; - - freed = parm; - num_freed_pages = leak_balloon(vb, oom_pages); - update_balloon_size(vb); - *freed += num_freed_pages; - - return NOTIFY_OK; -} - static void update_balloon_stats_func(struct work_struct *work) { struct virtio_balloon *vb; @@ -455,9 +417,13 @@ static int init_vqs(struct virtio_balloon *vb) num_stats = update_balloon_stats(vb); sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); - if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) - < 0) - BUG(); + err = virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, + GFP_KERNEL); + if (err) { + dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n", + __func__); + return err; + } virtqueue_kick(vb->stats_vq); } return 0; @@ -546,6 +512,52 @@ static struct file_system_type balloon_fs = { #endif /* CONFIG_BALLOON_COMPACTION */ +static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, + struct shrink_control *sc) +{ + unsigned long pages_to_free, pages_freed = 0; + struct virtio_balloon *vb = container_of(shrinker, + struct virtio_balloon, shrinker); + + pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE; + + /* + * One invocation of leak_balloon can deflate at most + * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it + * multiple times to deflate pages till reaching pages_to_free. + */ + while (vb->num_pages && pages_to_free) { + pages_to_free -= pages_freed; + pages_freed += leak_balloon(vb, pages_to_free); + } + update_balloon_size(vb); + + return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE; +} + +static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, + struct shrink_control *sc) +{ + struct virtio_balloon *vb = container_of(shrinker, + struct virtio_balloon, shrinker); + + return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE; +} + +static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb) +{ + unregister_shrinker(&vb->shrinker); +} + +static int virtio_balloon_register_shrinker(struct virtio_balloon *vb) +{ + vb->shrinker.scan_objects = virtio_balloon_shrinker_scan; + vb->shrinker.count_objects = virtio_balloon_shrinker_count; + vb->shrinker.seeks = DEFAULT_SEEKS; + + return register_shrinker(&vb->shrinker); +} + static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; @@ -557,7 +569,7 @@ static int virtballoon_probe(struct virtio_device *vdev) return -EINVAL; } - vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); + vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL); if (!vb) { err = -ENOMEM; goto out; @@ -566,8 +578,6 @@ static int virtballoon_probe(struct virtio_device *vdev) INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func); INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func); spin_lock_init(&vb->stop_update_lock); - vb->stop_update = false; - vb->num_pages = 0; mutex_init(&vb->balloon_lock); init_waitqueue_head(&vb->acked); vb->vdev = vdev; @@ -578,17 +588,10 @@ static int virtballoon_probe(struct virtio_device *vdev) if (err) goto out_free_vb; - vb->nb.notifier_call = virtballoon_oom_notify; - vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; - err = register_oom_notifier(&vb->nb); - if (err < 0) - goto out_del_vqs; - #ifdef CONFIG_BALLOON_COMPACTION balloon_mnt = kern_mount(&balloon_fs); if (IS_ERR(balloon_mnt)) { err = PTR_ERR(balloon_mnt); - unregister_oom_notifier(&vb->nb); goto out_del_vqs; } @@ -597,13 +600,19 @@ static int virtballoon_probe(struct virtio_device *vdev) if (IS_ERR(vb->vb_dev_info.inode)) { err = PTR_ERR(vb->vb_dev_info.inode); kern_unmount(balloon_mnt); - unregister_oom_notifier(&vb->nb); - vb->vb_dev_info.inode = NULL; goto out_del_vqs; } vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; #endif - + /* + * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a + * shrinker needs to be registered to relieve memory pressure. + */ + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) { + err = virtio_balloon_register_shrinker(vb); + if (err) + goto out_del_vqs; + } virtio_device_ready(vdev); if (towards_target(vb)) @@ -635,8 +644,8 @@ static void virtballoon_remove(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; - unregister_oom_notifier(&vb->nb); - + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) + virtio_balloon_unregister_shrinker(vb); spin_lock_irq(&vb->stop_update_lock); vb->stop_update = true; spin_unlock_irq(&vb->stop_update_lock); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c @@ -397,9 +397,23 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, /* Activate the queue */ writel(virtqueue_get_vring_size(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NUM); if (vm_dev->version == 1) { + u64 q_pfn = virtqueue_get_desc_addr(vq) >> PAGE_SHIFT; + + /* + * virtio-mmio v1 uses a 32bit QUEUE PFN. If we have something + * that doesn't fit in 32bit, fail the setup rather than + * pretending to be successful. + */ + if (q_pfn >> 32) { + dev_err(&vdev->dev, + "platform bug: legacy virtio-mmio must not be used with RAM above 0x%llxGB\n", + 0x1ULL << (32 + PAGE_SHIFT - 30)); + err = -E2BIG; + goto error_bad_pfn; + } + writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_QUEUE_ALIGN); - writel(virtqueue_get_desc_addr(vq) >> PAGE_SHIFT, - vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); + writel(q_pfn, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); } else { u64 addr; @@ -430,6 +444,8 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, return vq; +error_bad_pfn: + vring_del_virtqueue(vq); error_new_virtqueue: if (vm_dev->version == 1) { writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c @@ -122,6 +122,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, struct virtqueue *vq; u16 num; int err; + u64 q_pfn; /* Select the queue we're interested in */ iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); @@ -141,9 +142,17 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (!vq) return ERR_PTR(-ENOMEM); + q_pfn = virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; + if (q_pfn >> 32) { + dev_err(&vp_dev->pci_dev->dev, + "platform bug: legacy virtio-mmio must not be used with RAM above 0x%llxGB\n", + 0x1ULL << (32 + PAGE_SHIFT - 30)); + err = -E2BIG; + goto out_del_vq; + } + /* activate the queue */ - iowrite32(virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT, - vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + iowrite32(q_pfn, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; @@ -160,6 +169,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, out_deactivate: iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); +out_del_vq: vring_del_virtqueue(vq); return ERR_PTR(err); }