whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 7f80c7325be49db3fb8b5f343f47691b7999fda7
parent b72f711a4efadfaa8a16f9cb708bfe1ce6125906
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Thu,  6 Dec 2018 18:57:04 -0800

Merge tag 'nfs-for-4.20-5' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client bugfixes from Trond Myklebust:
 "This is mainly fallout from the updates to the SUNRPC code that is
  being triggered from less common combinations of NFS mount options.

  Highlights include:

  Stable fixes:
   - Fix a page leak when using RPCSEC_GSS/krb5p to encrypt data.

  Bugfixes:
   - Fix a regression that causes the RPC receive code to hang
   - Fix call_connect_status() so that it handles tasks that got
     transmitted while queued waiting for the socket lock.
   - Fix a memory leak in call_encode()
   - Fix several other connect races.
   - Fix receive code error handling.
   - Use the discard iterator rather than MSG_TRUNC for compatibility
     with AF_UNIX/AF_LOCAL sockets.
   - nfs: don't dirty kernel pages read by direct-io
   - pnfs/Flexfiles fix to enforce per-mirror stateid only for NFSv4
     data servers"

* tag 'nfs-for-4.20-5' of git://git.linux-nfs.org/projects/trondmy/linux-nfs:
  SUNRPC: Don't force a redundant disconnection in xs_read_stream()
  SUNRPC: Fix up socket polling
  SUNRPC: Use the discard iterator rather than MSG_TRUNC
  SUNRPC: Treat EFAULT as a truncated message in xs_read_stream_request()
  SUNRPC: Fix up handling of the XDRBUF_SPARSE_PAGES flag
  SUNRPC: Fix RPC receive hangs
  SUNRPC: Fix a potential race in xprt_connect()
  SUNRPC: Fix a memory leak in call_encode()
  SUNRPC: Fix leak of krb5p encode pages
  SUNRPC: call_connect_status() must handle tasks that got transmitted
  nfs: don't dirty kernel pages read by direct-io
  flexfiles: enforce per-mirror stateid only for v4 DSes

Diffstat:
Mfs/nfs/direct.c | 9++++++++-
Mfs/nfs/flexfilelayout/flexfilelayout.c | 6++++--
Minclude/linux/sunrpc/xdr.h | 1-
Mnet/sunrpc/auth_gss/auth_gss.c | 4++++
Mnet/sunrpc/clnt.c | 8++++++++
Mnet/sunrpc/xprt.c | 13+++++++++++--
Mnet/sunrpc/xprtsock.c | 81+++++++++++++++++++++++++++++++++++++------------------------------------------
7 files changed, 73 insertions(+), 49 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c @@ -98,8 +98,11 @@ struct nfs_direct_req { struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ struct work_struct work; int flags; + /* for write */ #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ + /* for read */ +#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */ struct nfs_writeverf verf; /* unstable write verifier */ }; @@ -412,7 +415,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; - if (!PageCompound(page) && bytes < hdr->good_bytes) + if (!PageCompound(page) && bytes < hdr->good_bytes && + (dreq->flags == NFS_ODIRECT_SHOULD_DIRTY)) set_page_dirty(page); bytes += req->wb_bytes; nfs_list_remove_request(req); @@ -587,6 +591,9 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; + if (iter_is_iovec(iter)) + dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; + nfs_start_io_direct(inode); NFS_I(inode)->read_io += count; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1733,7 +1733,8 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) if (fh) hdr->args.fh = fh; - if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + if (vers == 4 && + !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) goto out_failed; /* @@ -1798,7 +1799,8 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) if (fh) hdr->args.fh = fh; - if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + if (vers == 4 && + !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) goto out_failed; /* diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h @@ -72,7 +72,6 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) buf->head[0].iov_base = start; buf->head[0].iov_len = len; buf->tail[0].iov_len = 0; - buf->bvec = NULL; buf->pages = NULL; buf->page_len = 0; buf->flags = 0; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c @@ -1791,6 +1791,7 @@ priv_release_snd_buf(struct rpc_rqst *rqstp) for (i=0; i < rqstp->rq_enc_pages_num; i++) __free_page(rqstp->rq_enc_pages[i]); kfree(rqstp->rq_enc_pages); + rqstp->rq_release_snd_buf = NULL; } static int @@ -1799,6 +1800,9 @@ alloc_enc_pages(struct rpc_rqst *rqstp) struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; int first, last, i; + if (rqstp->rq_release_snd_buf) + rqstp->rq_release_snd_buf(rqstp); + if (snd_buf->page_len == 0) { rqstp->rq_enc_pages_num = 0; return 0; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c @@ -1915,6 +1915,13 @@ call_connect_status(struct rpc_task *task) struct rpc_clnt *clnt = task->tk_client; int status = task->tk_status; + /* Check if the task was already transmitted */ + if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) { + xprt_end_transmit(task); + task->tk_action = call_transmit_status; + return; + } + dprint_status(task); trace_rpc_connect_status(task); @@ -2302,6 +2309,7 @@ out_retry: task->tk_status = 0; /* Note: rpc_verify_header() may have freed the RPC slot */ if (task->tk_rqstp == req) { + xdr_free_bvec(&req->rq_rcv_buf); req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0; if (task->tk_client->cl_discrtry) xprt_conditional_disconnect(req->rq_xprt, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c @@ -826,8 +826,15 @@ void xprt_connect(struct rpc_task *task) return; if (xprt_test_and_set_connecting(xprt)) return; - xprt->stat.connect_start = jiffies; - xprt->ops->connect(xprt, task); + /* Race breaker */ + if (!xprt_connected(xprt)) { + xprt->stat.connect_start = jiffies; + xprt->ops->connect(xprt, task); + } else { + xprt_clear_connecting(xprt); + task->tk_status = 0; + rpc_wake_up_queued_task(&xprt->pending, task); + } } xprt_release_write(xprt, task); } @@ -1623,6 +1630,8 @@ xprt_request_init(struct rpc_task *task) req->rq_snd_buf.buflen = 0; req->rq_rcv_buf.len = 0; req->rq_rcv_buf.buflen = 0; + req->rq_snd_buf.bvec = NULL; + req->rq_rcv_buf.bvec = NULL; req->rq_release_snd_buf = NULL; xprt_reset_majortimeo(req); dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c @@ -330,18 +330,16 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) { size_t i,n; - if (!(buf->flags & XDRBUF_SPARSE_PAGES)) + if (!want || !(buf->flags & XDRBUF_SPARSE_PAGES)) return want; - if (want > buf->page_len) - want = buf->page_len; n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < n; i++) { if (buf->pages[i]) continue; buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp); if (!buf->pages[i]) { - buf->page_len = (i * PAGE_SIZE) - buf->page_base; - return buf->page_len; + i *= PAGE_SIZE; + return i > buf->page_base ? i - buf->page_base : 0; } } return want; @@ -378,8 +376,8 @@ static ssize_t xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, size_t count) { - struct kvec kvec = { 0 }; - return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0); + iov_iter_discard(&msg->msg_iter, READ, count); + return sock_recvmsg(sock, msg, flags); } static ssize_t @@ -398,16 +396,17 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) goto out; if (ret != want) - goto eagain; + goto out; seek = 0; } else { seek -= buf->head[0].iov_len; offset += buf->head[0].iov_len; } - if (seek < buf->page_len) { - want = xs_alloc_sparse_pages(buf, - min_t(size_t, count - offset, buf->page_len), - GFP_NOWAIT); + + want = xs_alloc_sparse_pages(buf, + min_t(size_t, count - offset, buf->page_len), + GFP_NOWAIT); + if (seek < want) { ret = xs_read_bvec(sock, msg, flags, buf->bvec, xdr_buf_pagecount(buf), want + buf->page_base, @@ -418,12 +417,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) goto out; if (ret != want) - goto eagain; + goto out; seek = 0; } else { - seek -= buf->page_len; - offset += buf->page_len; + seek -= want; + offset += want; } + if (seek < buf->tail[0].iov_len) { want = min_t(size_t, count - offset, buf->tail[0].iov_len); ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek); @@ -433,17 +433,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) goto out; if (ret != want) - goto eagain; + goto out; } else offset += buf->tail[0].iov_len; ret = -EMSGSIZE; - msg->msg_flags |= MSG_TRUNC; out: *read = offset - seek_init; return ret; -eagain: - ret = -EAGAIN; - goto out; sock_err: offset += seek; goto out; @@ -486,19 +482,20 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, if (transport->recv.offset == transport->recv.len) { if (xs_read_stream_request_done(transport)) msg->msg_flags |= MSG_EOR; - return transport->recv.copied; + return read; } switch (ret) { + default: + break; + case -EFAULT: case -EMSGSIZE: - return transport->recv.copied; + msg->msg_flags |= MSG_TRUNC; + return read; case 0: return -ESHUTDOWN; - default: - if (ret < 0) - return ret; } - return -EAGAIN; + return ret < 0 ? ret : read; } static size_t @@ -537,7 +534,7 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags) ret = xs_read_stream_request(transport, msg, flags, req); if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) - xprt_complete_bc_request(req, ret); + xprt_complete_bc_request(req, transport->recv.copied); return ret; } @@ -570,7 +567,7 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags) spin_lock(&xprt->queue_lock); if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) - xprt_complete_rqst(req->rq_task, ret); + xprt_complete_rqst(req->rq_task, transport->recv.copied); xprt_unpin_rqst(req); out: spin_unlock(&xprt->queue_lock); @@ -591,10 +588,8 @@ xs_read_stream(struct sock_xprt *transport, int flags) if (ret <= 0) goto out_err; transport->recv.offset = ret; - if (ret != want) { - ret = -EAGAIN; - goto out_err; - } + if (transport->recv.offset != want) + return transport->recv.offset; transport->recv.len = be32_to_cpu(transport->recv.fraghdr) & RPC_FRAGMENT_SIZE_MASK; transport->recv.offset -= sizeof(transport->recv.fraghdr); @@ -602,6 +597,9 @@ xs_read_stream(struct sock_xprt *transport, int flags) } switch (be32_to_cpu(transport->recv.calldir)) { + default: + msg.msg_flags |= MSG_TRUNC; + break; case RPC_CALL: ret = xs_read_stream_call(transport, &msg, flags); break; @@ -616,6 +614,9 @@ xs_read_stream(struct sock_xprt *transport, int flags) goto out_err; read += ret; if (transport->recv.offset < transport->recv.len) { + if (!(msg.msg_flags & MSG_TRUNC)) + return read; + msg.msg_flags = 0; ret = xs_read_discard(transport->sock, &msg, flags, transport->recv.len - transport->recv.offset); if (ret <= 0) @@ -623,7 +624,7 @@ xs_read_stream(struct sock_xprt *transport, int flags) transport->recv.offset += ret; read += ret; if (transport->recv.offset != transport->recv.len) - return -EAGAIN; + return read; } if (xs_read_stream_request_done(transport)) { trace_xs_stream_read_request(transport); @@ -633,13 +634,7 @@ xs_read_stream(struct sock_xprt *transport, int flags) transport->recv.len = 0; return read; out_err: - switch (ret) { - case 0: - case -ESHUTDOWN: - xprt_force_disconnect(&transport->xprt); - return -ESHUTDOWN; - } - return ret; + return ret != 0 ? ret : -ESHUTDOWN; } static void xs_stream_data_receive(struct sock_xprt *transport) @@ -648,12 +643,12 @@ static void xs_stream_data_receive(struct sock_xprt *transport) ssize_t ret = 0; mutex_lock(&transport->recv_mutex); + clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); if (transport->sock == NULL) goto out; - clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); for (;;) { ret = xs_read_stream(transport, MSG_DONTWAIT); - if (ret <= 0) + if (ret < 0) break; read += ret; cond_resched(); @@ -1345,10 +1340,10 @@ static void xs_udp_data_receive(struct sock_xprt *transport) int err; mutex_lock(&transport->recv_mutex); + clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); sk = transport->inet; if (sk == NULL) goto out; - clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); for (;;) { skb = skb_recv_udp(sk, 0, 1, &err); if (skb == NULL)