whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 956eb6cb36765e3a79a744e6c9364dab1548e460
parent 0e9da3fbf7d81f0f913b491c8de1ba7883d4f217
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Fri, 28 Dec 2018 13:57:59 -0800

Merge tag 'for-4.21/aio-20181221' of git://git.kernel.dk/linux-block

Pull aio updates from Jens Axboe:
 "Flushing out pre-patches for the buffered/polled aio series. Some
  fixes in here, but also optimizations"

* tag 'for-4.21/aio-20181221' of git://git.kernel.dk/linux-block:
  aio: abstract out io_event filler helper
  aio: split out iocb copy from io_submit_one()
  aio: use iocb_put() instead of open coding it
  aio: only use blk plugs for > 2 depth submissions
  aio: don't zero entire aio_kiocb aio_get_req()
  aio: separate out ring reservation from req allocation
  aio: use assigned completion handler

Diffstat:
Mfs/aio.c | 144++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
1 file changed, 88 insertions(+), 56 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c @@ -70,6 +70,12 @@ struct aio_ring { struct io_event io_events[0]; }; /* 128 bytes + ring size */ +/* + * Plugging is meant to work with larger batches of IOs. If we don't + * have more than the below, then don't bother setting up a plug. + */ +#define AIO_PLUG_THRESHOLD 2 + #define AIO_RING_PAGES 8 struct kioctx_table { @@ -902,7 +908,7 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr) local_irq_restore(flags); } -static bool get_reqs_available(struct kioctx *ctx) +static bool __get_reqs_available(struct kioctx *ctx) { struct kioctx_cpu *kcpu; bool ret = false; @@ -994,6 +1000,14 @@ static void user_refill_reqs_available(struct kioctx *ctx) spin_unlock_irq(&ctx->completion_lock); } +static bool get_reqs_available(struct kioctx *ctx) +{ + if (__get_reqs_available(ctx)) + return true; + user_refill_reqs_available(ctx); + return __get_reqs_available(ctx); +} + /* aio_get_req * Allocate a slot for an aio request. * Returns NULL if no requests are free. @@ -1002,24 +1016,16 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) { struct aio_kiocb *req; - if (!get_reqs_available(ctx)) { - user_refill_reqs_available(ctx); - if (!get_reqs_available(ctx)) - return NULL; - } - - req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); + req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL); if (unlikely(!req)) - goto out_put; + return NULL; percpu_ref_get(&ctx->reqs); + req->ki_ctx = ctx; INIT_LIST_HEAD(&req->ki_list); refcount_set(&req->ki_refcnt, 0); - req->ki_ctx = ctx; + req->ki_eventfd = NULL; return req; -out_put: - put_reqs_available(ctx, 1); - return NULL; } static struct kioctx *lookup_ioctx(unsigned long ctx_id) @@ -1059,6 +1065,15 @@ static inline void iocb_put(struct aio_kiocb *iocb) } } +static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb, + long res, long res2) +{ + ev->obj = (u64)(unsigned long)iocb->ki_user_iocb; + ev->data = iocb->ki_user_data; + ev->res = res; + ev->res2 = res2; +} + /* aio_complete * Called when the io request on the given iocb is complete. */ @@ -1086,10 +1101,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); event = ev_page + pos % AIO_EVENTS_PER_PAGE; - event->obj = (u64)(unsigned long)iocb->ki_user_iocb; - event->data = iocb->ki_user_data; - event->res = res; - event->res2 = res2; + aio_fill_event(event, iocb, res, res2); kunmap_atomic(ev_page); flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); @@ -1416,7 +1428,7 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2) aio_complete(iocb, res, res2); } -static int aio_prep_rw(struct kiocb *req, struct iocb *iocb) +static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) { int ret; @@ -1457,7 +1469,7 @@ out_fput: return ret; } -static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec, +static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec, bool vectored, bool compat, struct iov_iter *iter) { void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf; @@ -1492,12 +1504,12 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret) ret = -EINTR; /*FALLTHRU*/ default: - aio_complete_rw(req, ret, 0); + req->ki_complete(req, ret, 0); } } -static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored, - bool compat) +static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb, + bool vectored, bool compat) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iov_iter iter; @@ -1529,8 +1541,8 @@ out_fput: return ret; } -static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored, - bool compat) +static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb, + bool vectored, bool compat) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iov_iter iter; @@ -1585,7 +1597,8 @@ static void aio_fsync_work(struct work_struct *work) aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0); } -static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) +static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, + bool datasync) { if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)) @@ -1713,7 +1726,7 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, add_wait_queue(head, &pt->iocb->poll.wait); } -static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) +static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) { struct kioctx *ctx = aiocb->ki_ctx; struct poll_iocb *req = &aiocb->poll; @@ -1733,6 +1746,10 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) if (unlikely(!req->file)) return -EBADF; + req->head = NULL; + req->woken = false; + req->cancelled = false; + apt.pt._qproc = aio_poll_queue_proc; apt.pt._key = req->events; apt.iocb = aiocb; @@ -1781,44 +1798,44 @@ out: return 0; } -static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - bool compat) +static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, + struct iocb __user *user_iocb, bool compat) { struct aio_kiocb *req; - struct iocb iocb; ssize_t ret; - if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb)))) - return -EFAULT; - /* enforce forwards compatibility on users */ - if (unlikely(iocb.aio_reserved2)) { + if (unlikely(iocb->aio_reserved2)) { pr_debug("EINVAL: reserve field set\n"); return -EINVAL; } /* prevent overflows */ if (unlikely( - (iocb.aio_buf != (unsigned long)iocb.aio_buf) || - (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) || - ((ssize_t)iocb.aio_nbytes < 0) + (iocb->aio_buf != (unsigned long)iocb->aio_buf) || + (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) || + ((ssize_t)iocb->aio_nbytes < 0) )) { pr_debug("EINVAL: overflow check\n"); return -EINVAL; } + if (!get_reqs_available(ctx)) + return -EAGAIN; + + ret = -EAGAIN; req = aio_get_req(ctx); if (unlikely(!req)) - return -EAGAIN; + goto out_put_reqs_available; - if (iocb.aio_flags & IOCB_FLAG_RESFD) { + if (iocb->aio_flags & IOCB_FLAG_RESFD) { /* * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an * instance of the file* now. The file descriptor must be * an eventfd() fd, and will be signaled for each completed * event using the eventfd_signal() function. */ - req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd); + req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd); if (IS_ERR(req->ki_eventfd)) { ret = PTR_ERR(req->ki_eventfd); req->ki_eventfd = NULL; @@ -1833,32 +1850,32 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, } req->ki_user_iocb = user_iocb; - req->ki_user_data = iocb.aio_data; + req->ki_user_data = iocb->aio_data; - switch (iocb.aio_lio_opcode) { + switch (iocb->aio_lio_opcode) { case IOCB_CMD_PREAD: - ret = aio_read(&req->rw, &iocb, false, compat); + ret = aio_read(&req->rw, iocb, false, compat); break; case IOCB_CMD_PWRITE: - ret = aio_write(&req->rw, &iocb, false, compat); + ret = aio_write(&req->rw, iocb, false, compat); break; case IOCB_CMD_PREADV: - ret = aio_read(&req->rw, &iocb, true, compat); + ret = aio_read(&req->rw, iocb, true, compat); break; case IOCB_CMD_PWRITEV: - ret = aio_write(&req->rw, &iocb, true, compat); + ret = aio_write(&req->rw, iocb, true, compat); break; case IOCB_CMD_FSYNC: - ret = aio_fsync(&req->fsync, &iocb, false); + ret = aio_fsync(&req->fsync, iocb, false); break; case IOCB_CMD_FDSYNC: - ret = aio_fsync(&req->fsync, &iocb, true); + ret = aio_fsync(&req->fsync, iocb, true); break; case IOCB_CMD_POLL: - ret = aio_poll(req, &iocb); + ret = aio_poll(req, iocb); break; default: - pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode); + pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode); ret = -EINVAL; break; } @@ -1872,14 +1889,25 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, goto out_put_req; return 0; out_put_req: - put_reqs_available(ctx, 1); - percpu_ref_put(&ctx->reqs); if (req->ki_eventfd) eventfd_ctx_put(req->ki_eventfd); - kmem_cache_free(kiocb_cachep, req); + iocb_put(req); +out_put_reqs_available: + put_reqs_available(ctx, 1); return ret; } +static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, + bool compat) +{ + struct iocb iocb; + + if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb)))) + return -EFAULT; + + return __io_submit_one(ctx, &iocb, user_iocb, compat); +} + /* sys_io_submit: * Queue the nr iocbs pointed to by iocbpp for processing. Returns * the number of iocbs queued. May return -EINVAL if the aio_context @@ -1912,7 +1940,8 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, if (nr > ctx->nr_events) nr = ctx->nr_events; - blk_start_plug(&plug); + if (nr > AIO_PLUG_THRESHOLD) + blk_start_plug(&plug); for (i = 0; i < nr; i++) { struct iocb __user *user_iocb; @@ -1925,7 +1954,8 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, if (ret) break; } - blk_finish_plug(&plug); + if (nr > AIO_PLUG_THRESHOLD) + blk_finish_plug(&plug); percpu_ref_put(&ctx->users); return i ? i : ret; @@ -1952,7 +1982,8 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, if (nr > ctx->nr_events) nr = ctx->nr_events; - blk_start_plug(&plug); + if (nr > AIO_PLUG_THRESHOLD) + blk_start_plug(&plug); for (i = 0; i < nr; i++) { compat_uptr_t user_iocb; @@ -1965,7 +1996,8 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, if (ret) break; } - blk_finish_plug(&plug); + if (nr > AIO_PLUG_THRESHOLD) + blk_finish_plug(&plug); percpu_ref_put(&ctx->users); return i ? i : ret;