whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit e6ec2fda2d464938989ecd770be92e492ace3ae1
parent dc6fef2cc57972d4d64d9cd6d26b81060e1db0e6
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Sat, 19 Jan 2019 05:53:41 +1200

Merge tag 'for-linus-5.0-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen fixes from Juergen Gross:

 - Several fixes for the Xen pvcalls drivers (1 fix for the backend and
   8 for the frontend).

 - A fix for a rather longstanding bug in the Xen sched_clock()
   interface which led to weird time jumps when migrating the system.

 - A fix for avoiding accesses to x2apic MSRs in Xen PV guests.

* tag 'for-linus-5.0-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen: Fix x86 sched_clock() interface for xen
  pvcalls-front: fix potential null dereference
  always clear the X2APIC_ENABLE bit for PV guest
  pvcalls-front: Avoid get_free_pages(GFP_KERNEL) under spinlock
  xen/pvcalls: remove set but not used variable 'intf'
  pvcalls-back: set -ENOTCONN in pvcalls_conn_back_read
  pvcalls-front: don't return error when the ring is full
  pvcalls-front: properly allocate sk
  pvcalls-front: don't try to free unallocated rings
  pvcalls-front: read all data before closing the connection

Diffstat:
March/x86/xen/enlighten_pv.c | 5+----
March/x86/xen/time.c | 12+++++++++---
Mdrivers/xen/events/events_base.c | 2+-
Mdrivers/xen/pvcalls-back.c | 9++++-----
Mdrivers/xen/pvcalls-front.c | 104+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
5 files changed, 90 insertions(+), 42 deletions(-)

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c @@ -898,10 +898,7 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) val = native_read_msr_safe(msr, err); switch (msr) { case MSR_IA32_APICBASE: -#ifdef CONFIG_X86_X2APIC - if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_X2APIC & 31)))) -#endif - val &= ~X2APIC_ENABLE; + val &= ~X2APIC_ENABLE; break; } return val; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c @@ -361,8 +361,6 @@ void xen_timer_resume(void) { int cpu; - pvclock_resume(); - if (xen_clockevent != &xen_vcpuop_clockevent) return; @@ -379,12 +377,15 @@ static const struct pv_time_ops xen_time_ops __initconst = { }; static struct pvclock_vsyscall_time_info *xen_clock __read_mostly; +static u64 xen_clock_value_saved; void xen_save_time_memory_area(void) { struct vcpu_register_time_memory_area t; int ret; + xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset; + if (!xen_clock) return; @@ -404,7 +405,7 @@ void xen_restore_time_memory_area(void) int ret; if (!xen_clock) - return; + goto out; t.addr.v = &xen_clock->pvti; @@ -421,6 +422,11 @@ void xen_restore_time_memory_area(void) if (ret != 0) pr_notice("Cannot restore secondary vcpu_time_info (err %d)", ret); + +out: + /* Need pvclock_resume() before using xen_clocksource_read(). */ + pvclock_resume(); + xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved; } static void xen_setup_vsyscall_time_info(void) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c @@ -1650,7 +1650,7 @@ void xen_callback_vector(void) xen_have_vector_callback = 0; return; } - pr_info("Xen HVM callback vector for event delivery is enabled\n"); + pr_info_once("Xen HVM callback vector for event delivery is enabled\n"); alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, xen_hvm_callback_vector); } diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c @@ -160,9 +160,10 @@ static void pvcalls_conn_back_read(void *opaque) /* write the data, then modify the indexes */ virt_wmb(); - if (ret < 0) + if (ret < 0) { + atomic_set(&map->read, 0); intf->in_error = ret; - else + } else intf->in_prod = prod + ret; /* update the indexes, then notify the other end */ virt_wmb(); @@ -282,13 +283,11 @@ static int pvcalls_back_socket(struct xenbus_device *dev, static void pvcalls_sk_state_change(struct sock *sock) { struct sock_mapping *map = sock->sk_user_data; - struct pvcalls_data_intf *intf; if (map == NULL) return; - intf = map->ring; - intf->in_error = -ENOTCONN; + atomic_inc(&map->read); notify_remote_via_irq(map->irq); } diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c @@ -31,6 +31,12 @@ #define PVCALLS_NR_RSP_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE) #define PVCALLS_FRONT_MAX_SPIN 5000 +static struct proto pvcalls_proto = { + .name = "PVCalls", + .owner = THIS_MODULE, + .obj_size = sizeof(struct sock), +}; + struct pvcalls_bedata { struct xen_pvcalls_front_ring ring; grant_ref_t ref; @@ -335,6 +341,42 @@ int pvcalls_front_socket(struct socket *sock) return ret; } +static void free_active_ring(struct sock_mapping *map) +{ + if (!map->active.ring) + return; + + free_pages((unsigned long)map->active.data.in, + map->active.ring->ring_order); + free_page((unsigned long)map->active.ring); +} + +static int alloc_active_ring(struct sock_mapping *map) +{ + void *bytes; + + map->active.ring = (struct pvcalls_data_intf *) + get_zeroed_page(GFP_KERNEL); + if (!map->active.ring) + goto out; + + map->active.ring->ring_order = PVCALLS_RING_ORDER; + bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + PVCALLS_RING_ORDER); + if (!bytes) + goto out; + + map->active.data.in = bytes; + map->active.data.out = bytes + + XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER); + + return 0; + +out: + free_active_ring(map); + return -ENOMEM; +} + static int create_active(struct sock_mapping *map, int *evtchn) { void *bytes; @@ -343,15 +385,7 @@ static int create_active(struct sock_mapping *map, int *evtchn) *evtchn = -1; init_waitqueue_head(&map->active.inflight_conn_req); - map->active.ring = (struct pvcalls_data_intf *) - __get_free_page(GFP_KERNEL | __GFP_ZERO); - if (map->active.ring == NULL) - goto out_error; - map->active.ring->ring_order = PVCALLS_RING_ORDER; - bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - PVCALLS_RING_ORDER); - if (bytes == NULL) - goto out_error; + bytes = map->active.data.in; for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++) map->active.ring->ref[i] = gnttab_grant_foreign_access( pvcalls_front_dev->otherend_id, @@ -361,10 +395,6 @@ static int create_active(struct sock_mapping *map, int *evtchn) pvcalls_front_dev->otherend_id, pfn_to_gfn(virt_to_pfn((void *)map->active.ring)), 0); - map->active.data.in = bytes; - map->active.data.out = bytes + - XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER); - ret = xenbus_alloc_evtchn(pvcalls_front_dev, evtchn); if (ret) goto out_error; @@ -385,8 +415,6 @@ static int create_active(struct sock_mapping *map, int *evtchn) out_error: if (*evtchn >= 0) xenbus_free_evtchn(pvcalls_front_dev, *evtchn); - free_pages((unsigned long)map->active.data.in, PVCALLS_RING_ORDER); - free_page((unsigned long)map->active.ring); return ret; } @@ -406,17 +434,24 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr, return PTR_ERR(map); bedata = dev_get_drvdata(&pvcalls_front_dev->dev); + ret = alloc_active_ring(map); + if (ret < 0) { + pvcalls_exit_sock(sock); + return ret; + } spin_lock(&bedata->socket_lock); ret = get_request(bedata, &req_id); if (ret < 0) { spin_unlock(&bedata->socket_lock); + free_active_ring(map); pvcalls_exit_sock(sock); return ret; } ret = create_active(map, &evtchn); if (ret < 0) { spin_unlock(&bedata->socket_lock); + free_active_ring(map); pvcalls_exit_sock(sock); return ret; } @@ -469,8 +504,10 @@ static int __write_ring(struct pvcalls_data_intf *intf, virt_mb(); size = pvcalls_queued(prod, cons, array_size); - if (size >= array_size) + if (size > array_size) return -EINVAL; + if (size == array_size) + return 0; if (len > array_size - size) len = array_size - size; @@ -560,15 +597,13 @@ static int __read_ring(struct pvcalls_data_intf *intf, error = intf->in_error; /* get pointers before reading from the ring */ virt_rmb(); - if (error < 0) - return error; size = pvcalls_queued(prod, cons, array_size); masked_prod = pvcalls_mask(prod, array_size); masked_cons = pvcalls_mask(cons, array_size); if (size == 0) - return 0; + return error ?: size; if (len > size) len = size; @@ -780,25 +815,36 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) } } - spin_lock(&bedata->socket_lock); - ret = get_request(bedata, &req_id); - if (ret < 0) { + map2 = kzalloc(sizeof(*map2), GFP_KERNEL); + if (map2 == NULL) { clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags); - spin_unlock(&bedata->socket_lock); + pvcalls_exit_sock(sock); + return -ENOMEM; + } + ret = alloc_active_ring(map2); + if (ret < 0) { + clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, + (void *)&map->passive.flags); + kfree(map2); pvcalls_exit_sock(sock); return ret; } - map2 = kzalloc(sizeof(*map2), GFP_ATOMIC); - if (map2 == NULL) { + spin_lock(&bedata->socket_lock); + ret = get_request(bedata, &req_id); + if (ret < 0) { clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags); spin_unlock(&bedata->socket_lock); + free_active_ring(map2); + kfree(map2); pvcalls_exit_sock(sock); - return -ENOMEM; + return ret; } + ret = create_active(map2, &evtchn); if (ret < 0) { + free_active_ring(map2); kfree(map2); clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags); @@ -839,7 +885,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) received: map2->sock = newsock; - newsock->sk = kzalloc(sizeof(*newsock->sk), GFP_KERNEL); + newsock->sk = sk_alloc(sock_net(sock->sk), PF_INET, GFP_KERNEL, &pvcalls_proto, false); if (!newsock->sk) { bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID; map->passive.inflight_req_id = PVCALLS_INVALID_ID; @@ -1032,8 +1078,8 @@ int pvcalls_front_release(struct socket *sock) spin_lock(&bedata->socket_lock); list_del(&map->list); spin_unlock(&bedata->socket_lock); - if (READ_ONCE(map->passive.inflight_req_id) != - PVCALLS_INVALID_ID) { + if (READ_ONCE(map->passive.inflight_req_id) != PVCALLS_INVALID_ID && + READ_ONCE(map->passive.inflight_req_id) != 0) { pvcalls_front_free_map(bedata, map->passive.accept_map); }