whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 6aa293d8ff0939802a6c86cee6cd152c1b0a7a0d
parent 04a17edeca524b71dbb5be41a7002d247fbf34c0
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Wed,  2 Jan 2019 18:39:22 -0800

Merge branch 'for-linus-4.21-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml

Pull UML updates from Richard Weinberger:

 - DISCARD support for our block device driver

 - Many TLB flush optimizations

 - Various smaller fixes

 - And most important, Anton agreed to help me maintaining UML

* 'for-linus-4.21-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml:
  um: Remove obsolete reenable_XX calls
  um: writev needs <sys/uio.h>
  Add Anton Ivanov to UML maintainers
  um: remove redundant generic-y
  um: Optimize Flush TLB for force/fork case
  um: Avoid marking pages with "changed protection"
  um: Skip TLB flushing where not needed
  um: Optimize TLB operations v2
  um: Remove unnecessary faulted check in uaccess.c
  um: Add support for DISCARD in the UBD Driver
  um: Remove unsafe printks from the io thread
  um: Clean-up command processing in UML UBD driver
  um: Switch to block-mq constants in the UML UBD driver
  um: Make GCOV depend on !KCOV
  um: Include sys/uio.h to have writev()
  um: Add HAVE_DEBUG_BUGVERBOSE
  um: Update maintainers file entry

Diffstat:
MMAINTAINERS | 3++-
March/um/Kconfig | 1+
March/um/Kconfig.debug | 1+
March/um/drivers/chan_kern.c | 10----------
March/um/drivers/line.c | 10----------
March/um/drivers/mconsole_kern.c | 2--
March/um/drivers/net_kern.c | 2--
March/um/drivers/port_kern.c | 1-
March/um/drivers/random.c | 1-
March/um/drivers/ubd_kern.c | 231++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
March/um/drivers/vector_user.c | 2++
March/um/include/asm/Kbuild | 2--
March/um/include/asm/pgtable.h | 9++++++++-
March/um/include/shared/irq_user.h | 1-
March/um/include/shared/os.h | 1+
March/um/kernel/irq.c | 6------
March/um/kernel/sigio.c | 1-
March/um/kernel/skas/uaccess.c | 23++++++++++++-----------
March/um/kernel/tlb.c | 115++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
March/um/os-Linux/file.c | 10++++++++++
20 files changed, 259 insertions(+), 173 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS @@ -15951,15 +15951,16 @@ F: drivers/media/usb/zr364xx/ USER-MODE LINUX (UML) M: Jeff Dike <jdike@addtoit.com> M: Richard Weinberger <richard@nod.at> +M: Anton Ivanov <anton.ivanov@cambridgegreys.com> L: linux-um@lists.infradead.org W: http://user-mode-linux.sourceforge.net +Q: https://patchwork.ozlabs.org/project/linux-um/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git S: Maintained F: Documentation/virtual/uml/ F: arch/um/ F: arch/x86/um/ F: fs/hostfs/ -F: fs/hppfs/ USERSPACE COPYIN/COPYOUT (UIOVEC) M: Alexander Viro <viro@zeniv.linux.org.uk> diff --git a/arch/um/Kconfig b/arch/um/Kconfig @@ -12,6 +12,7 @@ config UML select HAVE_UID16 select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_BUGVERBOSE select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug @@ -16,6 +16,7 @@ config GPROF config GCOV bool "Enable gcov support" depends on DEBUG_INFO + depends on !KCOV help This option allows developers to retrieve coverage data from a UML session. diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c @@ -211,12 +211,6 @@ void deactivate_chan(struct chan *chan, int irq) deactivate_fd(chan->fd, irq); } -void reactivate_chan(struct chan *chan, int irq) -{ - if (chan && chan->enabled) - reactivate_fd(chan->fd, irq); -} - int write_chan(struct chan *chan, const char *buf, int len, int write_irq) { @@ -228,8 +222,6 @@ int write_chan(struct chan *chan, const char *buf, int len, n = chan->ops->write(chan->fd, buf, len, chan->data); if (chan->primary) { ret = n; - if ((ret == -EAGAIN) || ((ret >= 0) && (ret < len))) - reactivate_fd(chan->fd, write_irq); } return ret; } @@ -527,8 +519,6 @@ void chan_interrupt(struct line *line, int irq) tty_insert_flip_char(port, c, TTY_NORMAL); } while (err > 0); - if (err == 0) - reactivate_fd(chan->fd, irq); if (err == -EIO) { if (chan->primary) { tty_port_tty_hangup(&line->port, false); diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c @@ -235,14 +235,6 @@ void line_unthrottle(struct tty_struct *tty) line->throttled = 0; chan_interrupt(line, line->driver->read_irq); - - /* - * Maybe there is enough stuff pending that calling the interrupt - * throttles us again. In this case, line->throttled will be 1 - * again and we shouldn't turn the interrupt back on. - */ - if (!line->throttled) - reactivate_chan(line->chan_in, line->driver->read_irq); } static irqreturn_t line_write_interrupt(int irq, void *data) @@ -667,8 +659,6 @@ static irqreturn_t winch_interrupt(int irq, void *data) tty_kref_put(tty); } out: - if (winch->fd != -1) - reactivate_fd(winch->fd, WINCH_IRQ); return IRQ_HANDLED; } diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c @@ -96,7 +96,6 @@ static irqreturn_t mconsole_interrupt(int irq, void *dev_id) } if (!list_empty(&mc_requests)) schedule_work(&mconsole_work); - reactivate_fd(fd, MCONSOLE_IRQ); return IRQ_HANDLED; } @@ -240,7 +239,6 @@ void mconsole_stop(struct mc_request *req) (*req->cmd->handler)(req); } os_set_fd_block(req->originating_fd, 0); - reactivate_fd(req->originating_fd, MCONSOLE_IRQ); mconsole_reply(req, "", 0, 0); } diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c @@ -137,8 +137,6 @@ static irqreturn_t uml_net_interrupt(int irq, void *dev_id) schedule_work(&lp->work); goto out; } - reactivate_fd(lp->fd, UM_ETH_IRQ); - out: spin_unlock(&lp->lock); return IRQ_HANDLED; diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c @@ -137,7 +137,6 @@ static void port_work_proc(struct work_struct *unused) if (!port->has_connection) continue; - reactivate_fd(port->fd, ACCEPT_IRQ); while (port_accept(port)) ; port->has_connection = 0; diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c @@ -73,7 +73,6 @@ static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size, return ret ? : -EAGAIN; atomic_inc(&host_sleep_count); - reactivate_fd(random_fd, RANDOM_IRQ); add_sigio_fd(random_fd); add_wait_queue(&host_read_wait, &wait); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2018 Cambridge Greys Ltd * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com) * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) * Licensed under the GPL @@ -43,11 +44,11 @@ #include <os.h> #include "cow.h" -enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH }; +/* Max request size is determined by sector mask - 32K */ +#define UBD_MAX_REQUEST (8 * sizeof(long)) struct io_thread_req { struct request *req; - enum ubd_req op; int fds[2]; unsigned long offsets[2]; unsigned long long offset; @@ -153,6 +154,7 @@ struct ubd { struct openflags openflags; unsigned shared:1; unsigned no_cow:1; + unsigned no_trim:1; struct cow cow; struct platform_device pdev; struct request_queue *queue; @@ -176,6 +178,7 @@ struct ubd { .boot_openflags = OPEN_FLAGS, \ .openflags = OPEN_FLAGS, \ .no_cow = 0, \ + .no_trim = 0, \ .shared = 0, \ .cow = DEFAULT_COW, \ .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ @@ -322,7 +325,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out) *index_out = n; err = -EINVAL; - for (i = 0; i < sizeof("rscd="); i++) { + for (i = 0; i < sizeof("rscdt="); i++) { switch (*str) { case 'r': flags.w = 0; @@ -336,12 +339,15 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out) case 'c': ubd_dev->shared = 1; break; + case 't': + ubd_dev->no_trim = 1; + break; case '=': str++; goto break_loop; default: *error_out = "Expected '=' or flag letter " - "(r, s, c, or d)"; + "(r, s, c, t or d)"; goto out; } str++; @@ -414,6 +420,7 @@ __uml_help(ubd_setup, " 'c' will cause the device to be treated as being shared between multiple\n" " UMLs and file locking will be turned off - this is appropriate for a\n" " cluster filesystem and inappropriate at almost all other times.\n\n" +" 't' will disable trim/discard support on the device (enabled by default).\n\n" ); static int udb_setup(char *str) @@ -511,16 +518,21 @@ static void ubd_handler(void) } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { struct io_thread_req *io_req = (*irq_req_buffer)[count]; - int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK; - - if (!blk_update_request(io_req->req, err, io_req->length)) - __blk_mq_end_request(io_req->req, err); + if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) { + blk_queue_max_discard_sectors(io_req->req->q, 0); + blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q); + } + if ((io_req->error) || (io_req->buffer == NULL)) + blk_mq_end_request(io_req->req, io_req->error); + else { + if (!blk_update_request(io_req->req, io_req->error, io_req->length)) + __blk_mq_end_request(io_req->req, io_req->error); + } kfree(io_req); } } - - reactivate_fd(thread_fd, UBD_IRQ); } static irqreturn_t ubd_intr(int irq, void *dev) @@ -789,7 +801,7 @@ static int ubd_open_dev(struct ubd *ubd_dev) if((fd == -ENOENT) && create_cow){ fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file, - ubd_dev->openflags, 1 << 9, PAGE_SIZE, + ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE, &ubd_dev->cow.bitmap_offset, &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset); @@ -830,6 +842,14 @@ static int ubd_open_dev(struct ubd *ubd_dev) if(err < 0) goto error; ubd_dev->cow.fd = err; } + if (ubd_dev->no_trim == 0) { + ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE; + ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE; + blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST); + blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue); + } + blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue); return 0; error: os_close_file(ubd_dev->fd); @@ -882,7 +902,7 @@ static int ubd_disk_register(int major, u64 size, int unit, return 0; } -#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) +#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE)) static const struct blk_mq_ops ubd_mq_ops = { .queue_rq = ubd_queue_rq, @@ -1234,10 +1254,10 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, __u64 bitmap_offset, unsigned long *bitmap_words, __u64 bitmap_len) { - __u64 sector = io_offset >> 9; + __u64 sector = io_offset >> SECTOR_SHIFT; int i, update_bitmap = 0; - for(i = 0; i < length >> 9; i++){ + for (i = 0; i < length >> SECTOR_SHIFT; i++) { if(cow_mask != NULL) ubd_set_bit(i, (unsigned char *) cow_mask); if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) @@ -1271,14 +1291,14 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, __u64 bitmap_offset, __u64 bitmap_len) { - __u64 sector = req->offset >> 9; + __u64 sector = req->offset >> SECTOR_SHIFT; int i; - if(req->length > (sizeof(req->sector_mask) * 8) << 9) + if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT) panic("Operation too long"); - if(req->op == UBD_READ) { - for(i = 0; i < req->length >> 9; i++){ + if (req_op(req->req) == REQ_OP_READ) { + for (i = 0; i < req->length >> SECTOR_SHIFT; i++) { if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) ubd_set_bit(i, (unsigned char *) &req->sector_mask); @@ -1307,68 +1327,86 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, io_req->fds[0] = dev->fd; io_req->error = 0; - if (req_op(req) == REQ_OP_FLUSH) { - io_req->op = UBD_FLUSH; - } else { - io_req->fds[1] = dev->fd; - io_req->cow_offset = -1; - io_req->offset = off; - io_req->length = bvec->bv_len; - io_req->sector_mask = 0; - io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE; - io_req->offsets[0] = 0; - io_req->offsets[1] = dev->cow.data_offset; + if (bvec != NULL) { io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset; - io_req->sectorsize = 1 << 9; - - if (dev->cow.file) { - cowify_req(io_req, dev->cow.bitmap, - dev->cow.bitmap_offset, dev->cow.bitmap_len); - } + io_req->length = bvec->bv_len; + } else { + io_req->buffer = NULL; + io_req->length = blk_rq_bytes(req); } + io_req->sectorsize = SECTOR_SIZE; + io_req->fds[1] = dev->fd; + io_req->cow_offset = -1; + io_req->offset = off; + io_req->sector_mask = 0; + io_req->offsets[0] = 0; + io_req->offsets[1] = dev->cow.data_offset; + + if (dev->cow.file) + cowify_req(io_req, dev->cow.bitmap, + dev->cow.bitmap_offset, dev->cow.bitmap_len); + ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); if (ret != sizeof(io_req)) { if (ret != -EAGAIN) pr_err("write to io thread failed: %d\n", -ret); kfree(io_req); } - return ret; } +static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req) +{ + struct req_iterator iter; + struct bio_vec bvec; + int ret; + u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT; + + rq_for_each_segment(bvec, req, iter) { + ret = ubd_queue_one_vec(hctx, req, off, &bvec); + if (ret < 0) + return ret; + off += bvec.bv_len; + } + return 0; +} + static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct ubd *ubd_dev = hctx->queue->queuedata; struct request *req = bd->rq; - int ret = 0; + int ret = 0, res = BLK_STS_OK; blk_mq_start_request(req); spin_lock_irq(&ubd_dev->lock); - if (req_op(req) == REQ_OP_FLUSH) { + switch (req_op(req)) { + /* operations with no lentgth/offset arguments */ + case REQ_OP_FLUSH: ret = ubd_queue_one_vec(hctx, req, 0, NULL); - } else { - struct req_iterator iter; - struct bio_vec bvec; - u64 off = (u64)blk_rq_pos(req) << 9; - - rq_for_each_segment(bvec, req, iter) { - ret = ubd_queue_one_vec(hctx, req, off, &bvec); - if (ret < 0) - goto out; - off += bvec.bv_len; - } + break; + case REQ_OP_READ: + case REQ_OP_WRITE: + ret = queue_rw_req(hctx, req); + break; + case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: + ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL); + break; + default: + WARN_ON_ONCE(1); + res = BLK_STS_NOTSUPP; } -out: + spin_unlock_irq(&ubd_dev->lock); if (ret < 0) blk_mq_requeue_request(req, true); - return BLK_STS_OK; + return res; } static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) @@ -1413,39 +1451,60 @@ static int ubd_ioctl(struct block_device *bdev, fmode_t mode, return -EINVAL; } +static int map_error(int error_code) +{ + switch (error_code) { + case 0: + return BLK_STS_OK; + case ENOSYS: + case EOPNOTSUPP: + return BLK_STS_NOTSUPP; + case ENOSPC: + return BLK_STS_NOSPC; + } + return BLK_STS_IOERR; +} + +/* + * Everything from here onwards *IS NOT PART OF THE KERNEL* + * + * The following functions are part of UML hypervisor code. + * All functions from here onwards are executed as a helper + * thread and are not allowed to execute any kernel functions. + * + * Any communication must occur strictly via shared memory and IPC. + * + * Do not add printks, locks, kernel memory operations, etc - it + * will result in unpredictable behaviour and/or crashes. + */ + static int update_bitmap(struct io_thread_req *req) { int n; if(req->cow_offset == -1) - return 0; + return map_error(0); n = os_pwrite_file(req->fds[1], &req->bitmap_words, sizeof(req->bitmap_words), req->cow_offset); - if(n != sizeof(req->bitmap_words)){ - printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, - req->fds[1]); - return 1; - } + if (n != sizeof(req->bitmap_words)) + return map_error(-n); - return 0; + return map_error(0); } static void do_io(struct io_thread_req *req) { - char *buf; + char *buf = NULL; unsigned long len; int n, nsectors, start, end, bit; __u64 off; - if (req->op == UBD_FLUSH) { + /* FLUSH is really a special case, we cannot "case" it with others */ + + if (req_op(req->req) == REQ_OP_FLUSH) { /* fds[0] is always either the rw image or our cow file */ - n = os_sync_file(req->fds[0]); - if (n != 0) { - printk("do_io - sync failed err = %d " - "fd = %d\n", -n, req->fds[0]); - req->error = 1; - } + req->error = map_error(-os_sync_file(req->fds[0])); return; } @@ -1462,30 +1521,42 @@ static void do_io(struct io_thread_req *req) off = req->offset + req->offsets[bit] + start * req->sectorsize; len = (end - start) * req->sectorsize; - buf = &req->buffer[start * req->sectorsize]; + if (req->buffer != NULL) + buf = &req->buffer[start * req->sectorsize]; - if(req->op == UBD_READ){ + switch (req_op(req->req)) { + case REQ_OP_READ: n = 0; do { buf = &buf[n]; len -= n; n = os_pread_file(req->fds[bit], buf, len, off); if (n < 0) { - printk("do_io - read failed, err = %d " - "fd = %d\n", -n, req->fds[bit]); - req->error = 1; + req->error = map_error(-n); return; } } while((n < len) && (n != 0)); if (n < len) memset(&buf[n], 0, len - n); - } else { + break; + case REQ_OP_WRITE: n = os_pwrite_file(req->fds[bit], buf, len, off); if(n != len){ - printk("do_io - write failed err = %d " - "fd = %d\n", -n, req->fds[bit]); - req->error = 1; + req->error = map_error(-n); + return; + } + break; + case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: + n = os_falloc_punch(req->fds[bit], off, len); + if (n) { + req->error = map_error(-n); return; } + break; + default: + WARN_ON_ONCE(1); + req->error = BLK_STS_NOTSUPP; + return; } start = end; @@ -1520,11 +1591,6 @@ int io_thread(void *arg) if (n == -EAGAIN) { ubd_read_poll(-1); continue; - } else { - printk("io_thread - read failed, fd = %d, " - "err = %d," - "reminder = %d\n", - kernel_fd, -n, io_remainder_size); } } @@ -1539,11 +1605,6 @@ int io_thread(void *arg) res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n); if (res >= 0) { written += res; - } else { - if (res != -EAGAIN) { - printk("io_thread - write failed, fd = %d, " - "err = %d\n", kernel_fd, -n); - } } if (written < n) { ubd_write_poll(-1); diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c @@ -25,11 +25,13 @@ #include <linux/if_packet.h> #include <sys/socket.h> #include <sys/wait.h> +#include <sys/uio.h> #include <linux/virtio_net.h> #include <netdb.h> #include <stdlib.h> #include <os.h> #include <um_malloc.h> +#include <sys/uio.h> #include "vector_user.h" #define ID_GRE 0 diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild @@ -10,9 +10,7 @@ generic-y += exec.h generic-y += extable.h generic-y += ftrace.h generic-y += futex.h -generic-y += hardirq.h generic-y += hw_irq.h -generic-y += io.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h @@ -197,12 +197,17 @@ static inline pte_t pte_mkold(pte_t pte) static inline pte_t pte_wrprotect(pte_t pte) { - pte_clear_bits(pte, _PAGE_RW); + if (likely(pte_get_bits(pte, _PAGE_RW))) + pte_clear_bits(pte, _PAGE_RW); + else + return pte; return(pte_mknewprot(pte)); } static inline pte_t pte_mkread(pte_t pte) { + if (unlikely(pte_get_bits(pte, _PAGE_USER))) + return pte; pte_set_bits(pte, _PAGE_USER); return(pte_mknewprot(pte)); } @@ -221,6 +226,8 @@ static inline pte_t pte_mkyoung(pte_t pte) static inline pte_t pte_mkwrite(pte_t pte) { + if (unlikely(pte_get_bits(pte, _PAGE_RW))) + return pte; pte_set_bits(pte, _PAGE_RW); return(pte_mknewprot(pte)); } diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h @@ -31,7 +31,6 @@ struct irq_fd { struct siginfo; extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); extern void free_irq_by_fd(int fd); -extern void reactivate_fd(int fd, int irqnum); extern void deactivate_fd(int fd, int irqnum); extern int deactivate_all_fds(void); extern int activate_ipi(int fd, int pid); diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h @@ -175,6 +175,7 @@ extern int os_fchange_dir(int fd); extern unsigned os_major(unsigned long long dev); extern unsigned os_minor(unsigned long long dev); extern unsigned long long os_makedev(unsigned major, unsigned minor); +extern int os_falloc_punch(int fd, unsigned long long offset, int count); /* start_up.c */ extern void os_early_checks(void); diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c @@ -350,11 +350,6 @@ static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) } -void reactivate_fd(int fd, int irqnum) -{ - /** NOP - we do auto-EOI now **/ -} - void deactivate_fd(int fd, int irqnum) { struct irq_entry *to_free; @@ -449,7 +444,6 @@ int um_request_irq(unsigned int irq, int fd, int type, } EXPORT_SYMBOL(um_request_irq); -EXPORT_SYMBOL(reactivate_fd); /* * irq_chip must define at least enable/disable and ack when diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c @@ -16,7 +16,6 @@ static irqreturn_t sigio_interrupt(int irq, void *data) char c; os_read_file(sigio_irq_fd, &c, sizeof(c)); - reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ); return IRQ_HANDLED; } diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c @@ -62,27 +62,28 @@ static int do_op_one_page(unsigned long addr, int len, int is_write, jmp_buf buf; struct page *page; pte_t *pte; - int n, faulted; + int n; pte = maybe_map(addr, is_write); if (pte == NULL) return -1; page = pte_page(*pte); +#ifdef CONFIG_64BIT + pagefault_disable(); + addr = (unsigned long) page_address(page) + + (addr & ~PAGE_MASK); +#else addr = (unsigned long) kmap_atomic(page) + (addr & ~PAGE_MASK); +#endif + n = (*op)(addr, len, arg); - current->thread.fault_catcher = &buf; - - faulted = UML_SETJMP(&buf); - if (faulted == 0) - n = (*op)(addr, len, arg); - else - n = -1; - - current->thread.fault_catcher = NULL; - +#ifdef CONFIG_64BIT + pagefault_enable(); +#else kunmap_atomic((void *)addr); +#endif return n; } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c @@ -37,17 +37,19 @@ struct host_vm_change { } mprotect; } u; } ops[1]; + int userspace; int index; - struct mm_id *id; + struct mm_struct *mm; void *data; int force; }; -#define INIT_HVC(mm, force) \ +#define INIT_HVC(mm, force, userspace) \ ((struct host_vm_change) \ { .ops = { { .type = NONE } }, \ - .id = &mm->context.id, \ + .mm = mm, \ .data = NULL, \ + .userspace = userspace, \ .index = 0, \ .force = force }) @@ -68,18 +70,40 @@ static int do_ops(struct host_vm_change *hvc, int end, op = &hvc->ops[i]; switch (op->type) { case MMAP: - ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len, - op->u.mmap.prot, op->u.mmap.fd, - op->u.mmap.offset, finished, &hvc->data); + if (hvc->userspace) + ret = map(&hvc->mm->context.id, op->u.mmap.addr, + op->u.mmap.len, op->u.mmap.prot, + op->u.mmap.fd, + op->u.mmap.offset, finished, + &hvc->data); + else + map_memory(op->u.mmap.addr, op->u.mmap.offset, + op->u.mmap.len, 1, 1, 1); break; case MUNMAP: - ret = unmap(hvc->id, op->u.munmap.addr, - op->u.munmap.len, finished, &hvc->data); + if (hvc->userspace) + ret = unmap(&hvc->mm->context.id, + op->u.munmap.addr, + op->u.munmap.len, finished, + &hvc->data); + else + ret = os_unmap_memory( + (void *) op->u.munmap.addr, + op->u.munmap.len); + break; case MPROTECT: - ret = protect(hvc->id, op->u.mprotect.addr, - op->u.mprotect.len, op->u.mprotect.prot, - finished, &hvc->data); + if (hvc->userspace) + ret = protect(&hvc->mm->context.id, + op->u.mprotect.addr, + op->u.mprotect.len, + op->u.mprotect.prot, + finished, &hvc->data); + else + ret = os_protect_memory( + (void *) op->u.mprotect.addr, + op->u.mprotect.len, + 1, 1, 1); break; default: printk(KERN_ERR "Unknown op type %d in do_ops\n", @@ -100,9 +124,12 @@ static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, { __u64 offset; struct host_vm_op *last; - int fd, ret = 0; + int fd = -1, ret = 0; - fd = phys_mapping(phys, &offset); + if (hvc->userspace) + fd = phys_mapping(phys, &offset); + else + offset = phys; if (hvc->index != 0) { last = &hvc->ops[hvc->index - 1]; if ((last->type == MMAP) && @@ -215,10 +242,11 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | (x ? UM_PROT_EXEC : 0)); if (hvc->force || pte_newpage(*pte)) { - if (pte_present(*pte)) - ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, - PAGE_SIZE, prot, hvc); - else + if (pte_present(*pte)) { + if (pte_newpage(*pte)) + ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, + PAGE_SIZE, prot, hvc); + } else ret = add_munmap(addr, PAGE_SIZE, hvc); } else if (pte_newprot(*pte)) ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); @@ -277,9 +305,9 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, pgd_t *pgd; struct host_vm_change hvc; unsigned long addr = start_addr, next; - int ret = 0; + int ret = 0, userspace = 1; - hvc = INIT_HVC(mm, force); + hvc = INIT_HVC(mm, force, userspace); pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end_addr); @@ -314,9 +342,11 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) pmd_t *pmd; pte_t *pte; unsigned long addr, last; - int updated = 0, err; + int updated = 0, err = 0, force = 0, userspace = 0; + struct host_vm_change hvc; mm = &init_mm; + hvc = INIT_HVC(mm, force, userspace); for (addr = start; addr < end;) { pgd = pgd_offset(mm, addr); if (!pgd_present(*pgd)) { @@ -325,8 +355,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) last = end; if (pgd_newpage(*pgd)) { updated = 1; - err = os_unmap_memory((void *) addr, - last - addr); + err = add_munmap(addr, last - addr, &hvc); if (err < 0) panic("munmap failed, errno = %d\n", -err); @@ -342,8 +371,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) last = end; if (pud_newpage(*pud)) { updated = 1; - err = os_unmap_memory((void *) addr, - last - addr); + err = add_munmap(addr, last - addr, &hvc); if (err < 0) panic("munmap failed, errno = %d\n", -err); @@ -359,8 +387,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) last = end; if (pmd_newpage(*pmd)) { updated = 1; - err = os_unmap_memory((void *) addr, - last - addr); + err = add_munmap(addr, last - addr, &hvc); if (err < 0) panic("munmap failed, errno = %d\n", -err); @@ -372,22 +399,25 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) pte = pte_offset_kernel(pmd, addr); if (!pte_present(*pte) || pte_newpage(*pte)) { updated = 1; - err = os_unmap_memory((void *) addr, - PAGE_SIZE); + err = add_munmap(addr, PAGE_SIZE, &hvc); if (err < 0) panic("munmap failed, errno = %d\n", -err); if (pte_present(*pte)) - map_memory(addr, - pte_val(*pte) & PAGE_MASK, - PAGE_SIZE, 1, 1, 1); + err = add_mmap(addr, pte_val(*pte) & PAGE_MASK, + PAGE_SIZE, 0, &hvc); } else if (pte_newprot(*pte)) { updated = 1; - os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1); + err = add_mprotect(addr, PAGE_SIZE, 0, &hvc); } addr += PAGE_SIZE; } + if (!err) + err = do_ops(&hvc, hvc.index, 1); + + if (err < 0) + panic("flush_tlb_kernel failed, errno = %d\n", err); return updated; } @@ -491,6 +521,13 @@ pte_t *addr_pte(struct task_struct *task, unsigned long addr) void flush_tlb_all(void) { + /* + * Don't bother flushing if this address space is about to be + * destroyed. + */ + if (atomic_read(&current->mm->mm_users) == 0) + return; + flush_tlb_mm(current->mm); } @@ -512,6 +549,13 @@ void __flush_tlb_one(unsigned long addr) static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { + /* + * Don't bother flushing if this address space is about to be + * destroyed. + */ + if (atomic_read(&mm->mm_users) == 0) + return; + fix_range_common(mm, start_addr, end_addr, force); } @@ -527,13 +571,6 @@ EXPORT_SYMBOL(flush_tlb_range); void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end) { - /* - * Don't bother flushing if this address space is about to be - * destroyed. - */ - if (atomic_read(&mm->mm_users) == 0) - return; - fix_range(mm, start, end, 0); } diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c @@ -610,3 +610,13 @@ unsigned long long os_makedev(unsigned major, unsigned minor) { return makedev(major, minor); } + +int os_falloc_punch(int fd, unsigned long long offset, int len) +{ + int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len); + + if (n < 0) + return -errno; + return n; +} +