whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit d548e65904ae43b0637d200a2441fc94e0589c30
parent 77d0b194b2df04a1992f882d96ff4e2bd8bb8fe0
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Wed,  2 Jan 2019 18:54:45 -0800

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio/vhost updates from Michael Tsirkin:
"Features, fixes, cleanups:

   - discard in virtio blk

   - misc fixes and cleanups"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  vhost: correct the related warning message
  vhost: split structs into a separate header file
  virtio: remove deprecated VIRTIO_PCI_CONFIG()
  vhost/vsock: switch to a mutex for vhost_vsock_hash
  virtio_blk: add discard and write zeroes support

Diffstat:
Mdrivers/block/virtio_blk.c | 83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mdrivers/vhost/scsi.c | 4++--
Mdrivers/vhost/vsock.c | 16++++++++--------
Mdrivers/virtio/virtio_pci_legacy.c | 6++++--
Minclude/uapi/linux/vhost.h | 113++-----------------------------------------------------------------------------
Ainclude/uapi/linux/vhost_types.h | 128+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/uapi/linux/virtio_blk.h | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 279 insertions(+), 125 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c @@ -18,6 +18,7 @@ #define PART_BITS 4 #define VQ_NAME_LEN 16 +#define MAX_DISCARD_SEGMENTS 256u static int major; static DEFINE_IDA(vd_index_ida); @@ -172,10 +173,48 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); } +static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) +{ + unsigned short segments = blk_rq_nr_discard_segments(req); + unsigned short n = 0; + struct virtio_blk_discard_write_zeroes *range; + struct bio *bio; + u32 flags = 0; + + if (unmap) + flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP; + + range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); + if (!range) + return -ENOMEM; + + __rq_for_each_bio(bio, req) { + u64 sector = bio->bi_iter.bi_sector; + u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT; + + range[n].flags = cpu_to_le32(flags); + range[n].num_sectors = cpu_to_le32(num_sectors); + range[n].sector = cpu_to_le64(sector); + n++; + } + + req->special_vec.bv_page = virt_to_page(range); + req->special_vec.bv_offset = offset_in_page(range); + req->special_vec.bv_len = sizeof(*range) * segments; + req->rq_flags |= RQF_SPECIAL_PAYLOAD; + + return 0; +} + static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { + kfree(page_address(req->special_vec.bv_page) + + req->special_vec.bv_offset); + } + switch (req_op(req)) { case REQ_OP_SCSI_IN: case REQ_OP_SCSI_OUT: @@ -239,6 +278,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, int qid = hctx->queue_num; int err; bool notify = false; + bool unmap = false; u32 type; BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); @@ -251,6 +291,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, case REQ_OP_FLUSH: type = VIRTIO_BLK_T_FLUSH; break; + case REQ_OP_DISCARD: + type = VIRTIO_BLK_T_DISCARD; + break; + case REQ_OP_WRITE_ZEROES: + type = VIRTIO_BLK_T_WRITE_ZEROES; + unmap = !(req->cmd_flags & REQ_NOUNMAP); + break; case REQ_OP_SCSI_IN: case REQ_OP_SCSI_OUT: type = VIRTIO_BLK_T_SCSI_CMD; @@ -270,6 +317,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(req); + if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { + err = virtblk_setup_discard_write_zeroes(req, unmap); + if (err) + return BLK_STS_RESOURCE; + } + num = blk_rq_map_sg(hctx->queue, req, vbr->sg); if (num) { if (rq_data_dir(req) == WRITE) @@ -817,6 +870,32 @@ static int virtblk_probe(struct virtio_device *vdev) if (!err && opt_io_size) blk_queue_io_opt(q, blk_size * opt_io_size); + if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { + q->limits.discard_granularity = blk_size; + + virtio_cread(vdev, struct virtio_blk_config, + discard_sector_alignment, &v); + q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0; + + virtio_cread(vdev, struct virtio_blk_config, + max_discard_sectors, &v); + blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); + + virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, + &v); + blk_queue_max_discard_segments(q, + min_not_zero(v, + MAX_DISCARD_SEGMENTS)); + + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); + } + + if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { + virtio_cread(vdev, struct virtio_blk_config, + max_write_zeroes_sectors, &v); + blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); + } + virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); @@ -910,14 +989,14 @@ static unsigned int features_legacy[] = { VIRTIO_BLK_F_SCSI, #endif VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, - VIRTIO_BLK_F_MQ, + VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, } ; static unsigned int features[] = { VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, - VIRTIO_BLK_F_MQ, + VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, }; static struct virtio_driver virtio_blk = { diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c @@ -884,7 +884,7 @@ vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc, if (unlikely(!copy_from_iter_full(vc->req, vc->req_size, &vc->out_iter))) { - vq_err(vq, "Faulted on copy_from_iter\n"); + vq_err(vq, "Faulted on copy_from_iter_full\n"); } else if (unlikely(*vc->lunp != 1)) { /* virtio-scsi spec requires byte 0 of the lun to be 1 */ vq_err(vq, "Illegal virtio-scsi lun: %u\n", *vc->lunp); @@ -1436,7 +1436,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, se_tpg = &tpg->se_tpg; ret = target_depend_item(&se_tpg->tpg_group.cg_item); if (ret) { - pr_warn("configfs_depend_item() failed: %d\n", ret); + pr_warn("target_depend_item() failed: %d\n", ret); kfree(vs_tpg); mutex_unlock(&tpg->tv_tpg_mutex); goto out; diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c @@ -27,14 +27,14 @@ enum { }; /* Used to track all the vhost_vsock instances on the system. */ -static DEFINE_SPINLOCK(vhost_vsock_lock); +static DEFINE_MUTEX(vhost_vsock_mutex); static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8); struct vhost_vsock { struct vhost_dev dev; struct vhost_virtqueue vqs[2]; - /* Link to global vhost_vsock_hash, writes use vhost_vsock_lock */ + /* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */ struct hlist_node hash; struct vhost_work send_pkt_work; @@ -51,7 +51,7 @@ static u32 vhost_transport_get_local_cid(void) return VHOST_VSOCK_DEFAULT_HOST_CID; } -/* Callers that dereference the return value must hold vhost_vsock_lock or the +/* Callers that dereference the return value must hold vhost_vsock_mutex or the * RCU read lock. */ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) @@ -584,10 +584,10 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) { struct vhost_vsock *vsock = file->private_data; - spin_lock_bh(&vhost_vsock_lock); + mutex_lock(&vhost_vsock_mutex); if (vsock->guest_cid) hash_del_rcu(&vsock->hash); - spin_unlock_bh(&vhost_vsock_lock); + mutex_unlock(&vhost_vsock_mutex); /* Wait for other CPUs to finish using vsock */ synchronize_rcu(); @@ -631,10 +631,10 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) return -EINVAL; /* Refuse if CID is already in use */ - spin_lock_bh(&vhost_vsock_lock); + mutex_lock(&vhost_vsock_mutex); other = vhost_vsock_get(guest_cid); if (other && other != vsock) { - spin_unlock_bh(&vhost_vsock_lock); + mutex_unlock(&vhost_vsock_mutex); return -EADDRINUSE; } @@ -643,7 +643,7 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) vsock->guest_cid = guest_cid; hash_add_rcu(vhost_vsock_hash, &vsock->hash, guest_cid); - spin_unlock_bh(&vhost_vsock_lock); + mutex_unlock(&vhost_vsock_mutex); return 0; } diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c @@ -52,7 +52,8 @@ static void vp_get(struct virtio_device *vdev, unsigned offset, { struct virtio_pci_device *vp_dev = to_vp_device(vdev); void __iomem *ioaddr = vp_dev->ioaddr + - VIRTIO_PCI_CONFIG(vp_dev) + offset; + VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) + + offset; u8 *ptr = buf; int i; @@ -67,7 +68,8 @@ static void vp_set(struct virtio_device *vdev, unsigned offset, { struct virtio_pci_device *vp_dev = to_vp_device(vdev); void __iomem *ioaddr = vp_dev->ioaddr + - VIRTIO_PCI_CONFIG(vp_dev) + offset; + VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) + + offset; const u8 *ptr = buf; int i; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h @@ -11,94 +11,9 @@ * device configuration. */ +#include <linux/vhost_types.h> #include <linux/types.h> -#include <linux/compiler.h> #include <linux/ioctl.h> -#include <linux/virtio_config.h> -#include <linux/virtio_ring.h> - -struct vhost_vring_state { - unsigned int index; - unsigned int num; -}; - -struct vhost_vring_file { - unsigned int index; - int fd; /* Pass -1 to unbind from file. */ - -}; - -struct vhost_vring_addr { - unsigned int index; - /* Option flags. */ - unsigned int flags; - /* Flag values: */ - /* Whether log address is valid. If set enables logging. */ -#define VHOST_VRING_F_LOG 0 - - /* Start of array of descriptors (virtually contiguous) */ - __u64 desc_user_addr; - /* Used structure address. Must be 32 bit aligned */ - __u64 used_user_addr; - /* Available structure address. Must be 16 bit aligned */ - __u64 avail_user_addr; - /* Logging support. */ - /* Log writes to used structure, at offset calculated from specified - * address. Address must be 32 bit aligned. */ - __u64 log_guest_addr; -}; - -/* no alignment requirement */ -struct vhost_iotlb_msg { - __u64 iova; - __u64 size; - __u64 uaddr; -#define VHOST_ACCESS_RO 0x1 -#define VHOST_ACCESS_WO 0x2 -#define VHOST_ACCESS_RW 0x3 - __u8 perm; -#define VHOST_IOTLB_MISS 1 -#define VHOST_IOTLB_UPDATE 2 -#define VHOST_IOTLB_INVALIDATE 3 -#define VHOST_IOTLB_ACCESS_FAIL 4 - __u8 type; -}; - -#define VHOST_IOTLB_MSG 0x1 -#define VHOST_IOTLB_MSG_V2 0x2 - -struct vhost_msg { - int type; - union { - struct vhost_iotlb_msg iotlb; - __u8 padding[64]; - }; -}; - -struct vhost_msg_v2 { - __u32 type; - __u32 reserved; - union { - struct vhost_iotlb_msg iotlb; - __u8 padding[64]; - }; -}; - -struct vhost_memory_region { - __u64 guest_phys_addr; - __u64 memory_size; /* bytes */ - __u64 userspace_addr; - __u64 flags_padding; /* No flags are currently specified. */ -}; - -/* All region addresses and sizes must be 4K aligned. */ -#define VHOST_PAGE_SIZE 0x1000 - -struct vhost_memory { - __u32 nregions; - __u32 padding; - struct vhost_memory_region regions[0]; -}; /* ioctls */ @@ -186,31 +101,7 @@ struct vhost_memory { * device. This can be used to stop the ring (e.g. for migration). */ #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) -/* Feature bits */ -/* Log all write descriptors. Can be changed while device is active. */ -#define VHOST_F_LOG_ALL 26 -/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ -#define VHOST_NET_F_VIRTIO_NET_HDR 27 - -/* VHOST_SCSI specific definitions */ - -/* - * Used by QEMU userspace to ensure a consistent vhost-scsi ABI. - * - * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate + - * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage - * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target. - * All the targets under vhost_wwpn can be seen and used by guset. - */ - -#define VHOST_SCSI_ABI_VERSION 1 - -struct vhost_scsi_target { - int abi_version; - char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */ - unsigned short vhost_tpgt; - unsigned short reserved; -}; +/* VHOST_SCSI specific defines */ #define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target) #define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target) diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_VHOST_TYPES_H +#define _LINUX_VHOST_TYPES_H +/* Userspace interface for in-kernel virtio accelerators. */ + +/* vhost is used to reduce the number of system calls involved in virtio. + * + * Existing virtio net code is used in the guest without modification. + * + * This header includes interface used by userspace hypervisor for + * device configuration. + */ + +#include <linux/types.h> +#include <linux/compiler.h> +#include <linux/virtio_config.h> +#include <linux/virtio_ring.h> + +struct vhost_vring_state { + unsigned int index; + unsigned int num; +}; + +struct vhost_vring_file { + unsigned int index; + int fd; /* Pass -1 to unbind from file. */ + +}; + +struct vhost_vring_addr { + unsigned int index; + /* Option flags. */ + unsigned int flags; + /* Flag values: */ + /* Whether log address is valid. If set enables logging. */ +#define VHOST_VRING_F_LOG 0 + + /* Start of array of descriptors (virtually contiguous) */ + __u64 desc_user_addr; + /* Used structure address. Must be 32 bit aligned */ + __u64 used_user_addr; + /* Available structure address. Must be 16 bit aligned */ + __u64 avail_user_addr; + /* Logging support. */ + /* Log writes to used structure, at offset calculated from specified + * address. Address must be 32 bit aligned. */ + __u64 log_guest_addr; +}; + +/* no alignment requirement */ +struct vhost_iotlb_msg { + __u64 iova; + __u64 size; + __u64 uaddr; +#define VHOST_ACCESS_RO 0x1 +#define VHOST_ACCESS_WO 0x2 +#define VHOST_ACCESS_RW 0x3 + __u8 perm; +#define VHOST_IOTLB_MISS 1 +#define VHOST_IOTLB_UPDATE 2 +#define VHOST_IOTLB_INVALIDATE 3 +#define VHOST_IOTLB_ACCESS_FAIL 4 + __u8 type; +}; + +#define VHOST_IOTLB_MSG 0x1 +#define VHOST_IOTLB_MSG_V2 0x2 + +struct vhost_msg { + int type; + union { + struct vhost_iotlb_msg iotlb; + __u8 padding[64]; + }; +}; + +struct vhost_msg_v2 { + __u32 type; + __u32 reserved; + union { + struct vhost_iotlb_msg iotlb; + __u8 padding[64]; + }; +}; + +struct vhost_memory_region { + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ + __u64 userspace_addr; + __u64 flags_padding; /* No flags are currently specified. */ +}; + +/* All region addresses and sizes must be 4K aligned. */ +#define VHOST_PAGE_SIZE 0x1000 + +struct vhost_memory { + __u32 nregions; + __u32 padding; + struct vhost_memory_region regions[0]; +}; + +/* VHOST_SCSI specific definitions */ + +/* + * Used by QEMU userspace to ensure a consistent vhost-scsi ABI. + * + * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate + + * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage + * ABI Rev 1: January 2013. Ignore vhost_tpgt field in struct vhost_scsi_target. + * All the targets under vhost_wwpn can be seen and used by guset. + */ + +#define VHOST_SCSI_ABI_VERSION 1 + +struct vhost_scsi_target { + int abi_version; + char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */ + unsigned short vhost_tpgt; + unsigned short reserved; +}; + +/* Feature bits */ +/* Log all write descriptors. Can be changed while device is active. */ +#define VHOST_F_LOG_ALL 26 +/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ +#define VHOST_NET_F_VIRTIO_NET_HDR 27 + +#endif diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h @@ -38,6 +38,8 @@ #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ #define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */ #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ +#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ +#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ /* Legacy feature bits */ #ifndef VIRTIO_BLK_NO_LEGACY @@ -86,6 +88,39 @@ struct virtio_blk_config { /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */ __u16 num_queues; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */ + /* + * The maximum discard sectors (in 512-byte sectors) for + * one segment. + */ + __u32 max_discard_sectors; + /* + * The maximum number of discard segments in a + * discard command. + */ + __u32 max_discard_seg; + /* Discard commands must be aligned to this number of sectors. */ + __u32 discard_sector_alignment; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */ + /* + * The maximum number of write zeroes sectors (in 512-byte sectors) in + * one segment. + */ + __u32 max_write_zeroes_sectors; + /* + * The maximum number of segments in a write zeroes + * command. + */ + __u32 max_write_zeroes_seg; + /* + * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the + * deallocation of one or more of the sectors. + */ + __u8 write_zeroes_may_unmap; + + __u8 unused1[3]; } __attribute__((packed)); /* @@ -114,6 +149,12 @@ struct virtio_blk_config { /* Get device ID command */ #define VIRTIO_BLK_T_GET_ID 8 +/* Discard command */ +#define VIRTIO_BLK_T_DISCARD 11 + +/* Write zeroes command */ +#define VIRTIO_BLK_T_WRITE_ZEROES 13 + #ifndef VIRTIO_BLK_NO_LEGACY /* Barrier before this op. */ #define VIRTIO_BLK_T_BARRIER 0x80000000 @@ -133,6 +174,19 @@ struct virtio_blk_outhdr { __virtio64 sector; }; +/* Unmap this range (only valid for write zeroes command) */ +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP 0x00000001 + +/* Discard/write zeroes range for each request. */ +struct virtio_blk_discard_write_zeroes { + /* discard/write zeroes start sector */ + __le64 sector; + /* number of discard/write zeroes sectors */ + __le32 num_sectors; + /* flags for this range */ + __le32 flags; +}; + #ifndef VIRTIO_BLK_NO_LEGACY struct virtio_scsi_inhdr { __virtio32 errors;