whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 0c86e761b95131943c2b8af2ffb3c0554f9a71f5
parent b3491d8430dd25f0a4e00c33d60da22a9bd9d052
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Wed, 31 Oct 2018 11:01:38 -0700

Merge tag 'vfio-v4.20-rc1.v2' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - EDID interfaces for vfio devices supporting display extensions (Gerd
   Hoffmann)

 - Generically select Type-1 IOMMU model support on ARM/ARM64 (Geert
   Uytterhoeven)

 - Quirk for VFs reporting INTx pin (Alex Williamson)

 - Fix error path memory leak in MSI support (Li Qiang)

* tag 'vfio-v4.20-rc1.v2' of git://github.com/awilliam/linux-vfio:
  vfio: add edid support to mbochs sample driver
  vfio: add edid api for display (vgpu) devices.
  drivers/vfio: Allow type-1 IOMMU instantiation with all ARM/ARM64 IOMMUs
  vfio/pci: Mask buggy SR-IOV VF INTx support
  vfio/pci: Fix potential memory leak in vfio_msi_cap_len

Diffstat:
Mdrivers/vfio/Kconfig | 2+-
Mdrivers/vfio/pci/vfio_pci.c | 8++++++--
Mdrivers/vfio/pci/vfio_pci_config.c | 31++++++++++++++++++++++++++++++-
Minclude/uapi/linux/vfio.h | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Msamples/vfio-mdev/mbochs.c | 136++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
5 files changed, 204 insertions(+), 23 deletions(-)

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig @@ -21,7 +21,7 @@ config VFIO_VIRQFD menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API - select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3) + select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64) select ANON_INODES help VFIO provides a framework for secure userspace device drivers. diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c @@ -434,10 +434,14 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) { if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) { u8 pin; + + if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || + vdev->nointx || vdev->pdev->is_virtfn) + return 0; + pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin); - if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && !vdev->nointx && pin) - return 1; + return pin ? 1 : 0; } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) { u8 pos; u16 flags; diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c @@ -1180,8 +1180,10 @@ static int vfio_msi_cap_len(struct vfio_pci_device *vdev, u8 pos) return -ENOMEM; ret = init_pci_cap_msi_perm(vdev->msi_perm, len, flags); - if (ret) + if (ret) { + kfree(vdev->msi_perm); return ret; + } return len; } @@ -1610,6 +1612,15 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev) } /* + * Nag about hardware bugs, hopefully to have vendors fix them, but at least + * to collect a list of dependencies for the VF INTx pin quirk below. + */ +static const struct pci_device_id known_bogus_vf_intx_pin[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x270c) }, + {} +}; + +/* * For each device we allocate a pci_config_map that indicates the * capability occupying each dword and thus the struct perm_bits we * use for read and write. We also allocate a virtualized config @@ -1674,6 +1685,24 @@ int vfio_config_init(struct vfio_pci_device *vdev) if (pdev->is_virtfn) { *(__le16 *)&vconfig[PCI_VENDOR_ID] = cpu_to_le16(pdev->vendor); *(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device); + + /* + * Per SR-IOV spec rev 1.1, 3.4.1.18 the interrupt pin register + * does not apply to VFs and VFs must implement this register + * as read-only with value zero. Userspace is not readily able + * to identify whether a device is a VF and thus that the pin + * definition on the device is bogus should it violate this + * requirement. We already virtualize the pin register for + * other purposes, so we simply need to replace the bogus value + * and consider VFs when we determine INTx IRQ count. + */ + if (vconfig[PCI_INTERRUPT_PIN] && + !pci_match_id(known_bogus_vf_intx_pin, pdev)) + pci_warn(pdev, + "Hardware bug: VF reports bogus INTx pin %d\n", + vconfig[PCI_INTERRUPT_PIN]); + + vconfig[PCI_INTERRUPT_PIN] = 0; /* Gratuitous for good VFs */ } if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h @@ -303,6 +303,56 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) +#define VFIO_REGION_TYPE_GFX (1) +#define VFIO_REGION_SUBTYPE_GFX_EDID (1) + +/** + * struct vfio_region_gfx_edid - EDID region layout. + * + * Set display link state and EDID blob. + * + * The EDID blob has monitor information such as brand, name, serial + * number, physical size, supported video modes and more. + * + * This special region allows userspace (typically qemu) set a virtual + * EDID for the virtual monitor, which allows a flexible display + * configuration. + * + * For the edid blob spec look here: + * https://en.wikipedia.org/wiki/Extended_Display_Identification_Data + * + * On linux systems you can find the EDID blob in sysfs: + * /sys/class/drm/${card}/${connector}/edid + * + * You can use the edid-decode ulility (comes with xorg-x11-utils) to + * decode the EDID blob. + * + * @edid_offset: location of the edid blob, relative to the + * start of the region (readonly). + * @edid_max_size: max size of the edid blob (readonly). + * @edid_size: actual edid size (read/write). + * @link_state: display link state (read/write). + * VFIO_DEVICE_GFX_LINK_STATE_UP: Monitor is turned on. + * VFIO_DEVICE_GFX_LINK_STATE_DOWN: Monitor is turned off. + * @max_xres: max display width (0 == no limitation, readonly). + * @max_yres: max display height (0 == no limitation, readonly). + * + * EDID update protocol: + * (1) set link-state to down. + * (2) update edid blob and size. + * (3) set link-state to up. + */ +struct vfio_region_gfx_edid { + __u32 edid_offset; + __u32 edid_max_size; + __u32 edid_size; + __u32 max_xres; + __u32 max_yres; + __u32 link_state; +#define VFIO_DEVICE_GFX_LINK_STATE_UP 1 +#define VFIO_DEVICE_GFX_LINK_STATE_DOWN 2 +}; + /* * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped * which allows direct access to non-MSIX registers which happened to be within diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c @@ -71,11 +71,19 @@ #define MBOCHS_NAME "mbochs" #define MBOCHS_CLASS_NAME "mbochs" +#define MBOCHS_EDID_REGION_INDEX VFIO_PCI_NUM_REGIONS +#define MBOCHS_NUM_REGIONS (MBOCHS_EDID_REGION_INDEX+1) + #define MBOCHS_CONFIG_SPACE_SIZE 0xff #define MBOCHS_MMIO_BAR_OFFSET PAGE_SIZE #define MBOCHS_MMIO_BAR_SIZE PAGE_SIZE -#define MBOCHS_MEMORY_BAR_OFFSET (MBOCHS_MMIO_BAR_OFFSET + \ +#define MBOCHS_EDID_OFFSET (MBOCHS_MMIO_BAR_OFFSET + \ MBOCHS_MMIO_BAR_SIZE) +#define MBOCHS_EDID_SIZE PAGE_SIZE +#define MBOCHS_MEMORY_BAR_OFFSET (MBOCHS_EDID_OFFSET + \ + MBOCHS_EDID_SIZE) + +#define MBOCHS_EDID_BLOB_OFFSET (MBOCHS_EDID_SIZE/2) #define STORE_LE16(addr, val) (*(u16 *)addr = val) #define STORE_LE32(addr, val) (*(u32 *)addr = val) @@ -95,16 +103,24 @@ MODULE_PARM_DESC(mem, "megabytes available to " MBOCHS_NAME " devices"); static const struct mbochs_type { const char *name; u32 mbytes; + u32 max_x; + u32 max_y; } mbochs_types[] = { { .name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_1, .mbytes = 4, + .max_x = 800, + .max_y = 600, }, { .name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_2, .mbytes = 16, + .max_x = 1920, + .max_y = 1440, }, { .name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_3, .mbytes = 64, + .max_x = 0, + .max_y = 0, }, }; @@ -115,6 +131,11 @@ static struct cdev mbochs_cdev; static struct device mbochs_dev; static int mbochs_used_mbytes; +struct vfio_region_info_ext { + struct vfio_region_info base; + struct vfio_region_info_cap_type type; +}; + struct mbochs_mode { u32 drm_format; u32 bytepp; @@ -144,13 +165,14 @@ struct mdev_state { u32 memory_bar_mask; struct mutex ops_lock; struct mdev_device *mdev; - struct vfio_device_info dev_info; const struct mbochs_type *type; u16 vbe[VBE_DISPI_INDEX_COUNT]; u64 memsize; struct page **pages; pgoff_t pagecount; + struct vfio_region_gfx_edid edid_regs; + u8 edid_blob[0x400]; struct list_head dmabufs; u32 active_id; @@ -342,10 +364,20 @@ static void handle_mmio_read(struct mdev_state *mdev_state, u16 offset, char *buf, u32 count) { struct device *dev = mdev_dev(mdev_state->mdev); + struct vfio_region_gfx_edid *edid; u16 reg16 = 0; int index; switch (offset) { + case 0x000 ... 0x3ff: /* edid block */ + edid = &mdev_state->edid_regs; + if (edid->link_state != VFIO_DEVICE_GFX_LINK_STATE_UP || + offset >= edid->edid_size) { + memset(buf, 0, count); + break; + } + memcpy(buf, mdev_state->edid_blob + offset, count); + break; case 0x500 ... 0x515: /* bochs dispi interface */ if (count != 2) goto unhandled; @@ -365,6 +397,44 @@ unhandled: } } +static void handle_edid_regs(struct mdev_state *mdev_state, u16 offset, + char *buf, u32 count, bool is_write) +{ + char *regs = (void *)&mdev_state->edid_regs; + + if (offset + count > sizeof(mdev_state->edid_regs)) + return; + if (count != 4) + return; + if (offset % 4) + return; + + if (is_write) { + switch (offset) { + case offsetof(struct vfio_region_gfx_edid, link_state): + case offsetof(struct vfio_region_gfx_edid, edid_size): + memcpy(regs + offset, buf, count); + break; + default: + /* read-only regs */ + break; + } + } else { + memcpy(buf, regs + offset, count); + } +} + +static void handle_edid_blob(struct mdev_state *mdev_state, u16 offset, + char *buf, u32 count, bool is_write) +{ + if (offset + count > mdev_state->edid_regs.edid_max_size) + return; + if (is_write) + memcpy(mdev_state->edid_blob + offset, buf, count); + else + memcpy(buf, mdev_state->edid_blob + offset, count); +} + static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, loff_t pos, bool is_write) { @@ -384,13 +454,25 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, memcpy(buf, (mdev_state->vconfig + pos), count); } else if (pos >= MBOCHS_MMIO_BAR_OFFSET && - pos + count <= MBOCHS_MEMORY_BAR_OFFSET) { + pos + count <= (MBOCHS_MMIO_BAR_OFFSET + + MBOCHS_MMIO_BAR_SIZE)) { pos -= MBOCHS_MMIO_BAR_OFFSET; if (is_write) handle_mmio_write(mdev_state, pos, buf, count); else handle_mmio_read(mdev_state, pos, buf, count); + } else if (pos >= MBOCHS_EDID_OFFSET && + pos + count <= (MBOCHS_EDID_OFFSET + + MBOCHS_EDID_SIZE)) { + pos -= MBOCHS_EDID_OFFSET; + if (pos < MBOCHS_EDID_BLOB_OFFSET) { + handle_edid_regs(mdev_state, pos, buf, count, is_write); + } else { + pos -= MBOCHS_EDID_BLOB_OFFSET; + handle_edid_blob(mdev_state, pos, buf, count, is_write); + } + } else if (pos >= MBOCHS_MEMORY_BAR_OFFSET && pos + count <= MBOCHS_MEMORY_BAR_OFFSET + mdev_state->memsize) { @@ -471,6 +553,10 @@ static int mbochs_create(struct kobject *kobj, struct mdev_device *mdev) mdev_state->next_id = 1; mdev_state->type = type; + mdev_state->edid_regs.max_xres = type->max_x; + mdev_state->edid_regs.max_yres = type->max_y; + mdev_state->edid_regs.edid_offset = MBOCHS_EDID_BLOB_OFFSET; + mdev_state->edid_regs.edid_max_size = sizeof(mdev_state->edid_blob); mbochs_create_config_space(mdev_state); mbochs_reset(mdev); @@ -932,16 +1018,16 @@ static int mbochs_dmabuf_export(struct mbochs_dmabuf *dmabuf) } static int mbochs_get_region_info(struct mdev_device *mdev, - struct vfio_region_info *region_info, - u16 *cap_type_id, void **cap_type) + struct vfio_region_info_ext *ext) { + struct vfio_region_info *region_info = &ext->base; struct mdev_state *mdev_state; mdev_state = mdev_get_drvdata(mdev); if (!mdev_state) return -EINVAL; - if (region_info->index >= VFIO_PCI_NUM_REGIONS) + if (region_info->index >= MBOCHS_NUM_REGIONS) return -EINVAL; switch (region_info->index) { @@ -964,6 +1050,20 @@ static int mbochs_get_region_info(struct mdev_device *mdev, region_info->flags = (VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE); break; + case MBOCHS_EDID_REGION_INDEX: + ext->base.argsz = sizeof(*ext); + ext->base.offset = MBOCHS_EDID_OFFSET; + ext->base.size = MBOCHS_EDID_SIZE; + ext->base.flags = (VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_CAPS); + ext->base.cap_offset = offsetof(typeof(*ext), type); + ext->type.header.id = VFIO_REGION_INFO_CAP_TYPE; + ext->type.header.version = 1; + ext->type.header.next = 0; + ext->type.type = VFIO_REGION_TYPE_GFX; + ext->type.subtype = VFIO_REGION_SUBTYPE_GFX_EDID; + break; default: region_info->size = 0; region_info->offset = 0; @@ -984,7 +1084,7 @@ static int mbochs_get_device_info(struct mdev_device *mdev, struct vfio_device_info *dev_info) { dev_info->flags = VFIO_DEVICE_FLAGS_PCI; - dev_info->num_regions = VFIO_PCI_NUM_REGIONS; + dev_info->num_regions = MBOCHS_NUM_REGIONS; dev_info->num_irqs = VFIO_PCI_NUM_IRQS; return 0; } @@ -1084,7 +1184,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd, unsigned long arg) { int ret = 0; - unsigned long minsz; + unsigned long minsz, outsz; struct mdev_state *mdev_state; mdev_state = mdev_get_drvdata(mdev); @@ -1106,8 +1206,6 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd, if (ret) return ret; - memcpy(&mdev_state->dev_info, &info, sizeof(info)); - if (copy_to_user((void __user *)arg, &info, minsz)) return -EFAULT; @@ -1115,24 +1213,24 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd, } case VFIO_DEVICE_GET_REGION_INFO: { - struct vfio_region_info info; - u16 cap_type_id = 0; - void *cap_type = NULL; + struct vfio_region_info_ext info; - minsz = offsetofend(struct vfio_region_info, offset); + minsz = offsetofend(typeof(info), base.offset); if (copy_from_user(&info, (void __user *)arg, minsz)) return -EFAULT; - if (info.argsz < minsz) + outsz = info.base.argsz; + if (outsz < minsz) + return -EINVAL; + if (outsz > sizeof(info)) return -EINVAL; - ret = mbochs_get_region_info(mdev, &info, &cap_type_id, - &cap_type); + ret = mbochs_get_region_info(mdev, &info); if (ret) return ret; - if (copy_to_user((void __user *)arg, &info, minsz)) + if (copy_to_user((void __user *)arg, &info, outsz)) return -EFAULT; return 0; @@ -1148,7 +1246,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd, return -EFAULT; if ((info.argsz < minsz) || - (info.index >= mdev_state->dev_info.num_irqs)) + (info.index >= VFIO_PCI_NUM_IRQS)) return -EINVAL; ret = mbochs_get_irq_info(mdev, &info);