whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 1fbf3e48123d701584bc75ccac67ef2fe412ac4c
parent f88c5942cfaf7d55e46d395136cccaca65b2e3bf
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Tue, 12 Mar 2019 14:50:42 -0700

Merge tag 'nfs-for-5.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:
   - Fixes for NFS I/O request leakages
   - Fix error handling paths in the NFS I/O recoalescing code
   - Reinitialise NFSv4.1 sequence results before retransmitting a
     request
   - Fix a soft lockup in the delegation recovery code
   - Bulk destroy of layouts needs to be safe w.r.t. umount
   - Prevent thundering herd issues when the SUNRPC socket is not
     connected
   - Respect RPC call timeouts when retrying transmission

  Features:
   - Convert rpc auth layer to use xdr_streams
   - Config option to disable insecure RPCSEC_GSS crypto types
   - Reduce size of RPC receive buffers
   - Readdirplus optimization by cache mechanism
   - Convert SUNRPC socket send code to use iov_iter()
   - SUNRPC micro-optimisations to avoid indirect calls
   - Add support for the pNFS LAYOUTERROR operation and use it with the
     pNFS/flexfiles driver
   - Add trace events to report non-zero NFS status codes
   - Various removals of unnecessary dprintks

  Bugfixes and cleanups:
   - Fix a number of sparse warnings and documentation format warnings
   - Fix nfs_parse_devname to not modify it's argument
   - Fix potential corruption of page being written through pNFS/blocks
   - fix xfstest generic/099 failures on nfsv3
   - Avoid NFSv4.1 "false retries" when RPC calls are interrupted
   - Abort I/O early if the pNFS/flexfiles layout segment was
     invalidated
   - Avoid unnecessary pNFS/flexfiles layout invalidations"

* tag 'nfs-for-5.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (90 commits)
  SUNRPC: Take the transport send lock before binding+connecting
  SUNRPC: Micro-optimise when the task is known not to be sleeping
  SUNRPC: Check whether the task was transmitted before rebind/reconnect
  SUNRPC: Remove redundant calls to RPC_IS_QUEUED()
  SUNRPC: Clean up
  SUNRPC: Respect RPC call timeouts when retrying transmission
  SUNRPC: Fix up RPC back channel transmission
  SUNRPC: Prevent thundering herd when the socket is not connected
  SUNRPC: Allow dynamic allocation of back channel slots
  NFSv4.1: Bump the default callback session slot count to 16
  SUNRPC: Convert remaining GFP_NOIO, and GFP_NOWAIT sites in sunrpc
  NFS/flexfiles: Clean up mirror DS initialisation
  NFS/flexfiles: Remove dead code in ff_layout_mirror_valid()
  NFS/flexfile: Simplify nfs4_ff_layout_select_ds_stateid()
  NFS/flexfile: Simplify nfs4_ff_layout_ds_version()
  NFS/flexfiles: Simplify ff_layout_get_ds_cred()
  NFS/flexfiles: Simplify nfs4_ff_find_or_create_ds_client()
  NFS/flexfiles: Simplify nfs4_ff_layout_select_ds_fh()
  NFS/flexfiles: Speed up read failover when DSes are down
  NFS/flexfiles: Don't invalidate DS deviceids for being unresponsive
  ...

Diffstat:
Mfs/lockd/clnt4xdr.c | 14--------------
Mfs/lockd/clntxdr.c | 14--------------
Mfs/nfs/callback_xdr.c | 64++++++++++++++++++++++++++++------------------------------------
Mfs/nfs/delegation.c | 22+++++++++++++---------
Mfs/nfs/delegation.h | 1+
Mfs/nfs/dir.c | 98+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mfs/nfs/direct.c | 7+++----
Mfs/nfs/file.c | 44++++++++++++++++++++++++++++----------------
Mfs/nfs/flexfilelayout/flexfilelayout.c | 225++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Mfs/nfs/flexfilelayout/flexfilelayout.h | 75++++++++++++++++++++++++++++++++-------------------------------------------
Mfs/nfs/flexfilelayout/flexfilelayoutdev.c | 161++++++++++++++++++++++++++++---------------------------------------------------
Mfs/nfs/inode.c | 33+++++++++++++++++----------------
Mfs/nfs/internal.h | 5++++-
Mfs/nfs/io.c | 12++++++------
Mfs/nfs/namespace.c | 8++++----
Mfs/nfs/nfs2xdr.c | 124++++++++++++++++++++++++++-----------------------------------------------------
Mfs/nfs/nfs3acl.c | 2--
Mfs/nfs/nfs3xdr.c | 209+++++++++++++++++++++++++------------------------------------------------------
Mfs/nfs/nfs42.h | 3+++
Mfs/nfs/nfs42proc.c | 164+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mfs/nfs/nfs42xdr.c | 130+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Mfs/nfs/nfs4client.c | 33++++++++++++++++++---------------
Mfs/nfs/nfs4namespace.c | 5+++--
Mfs/nfs/nfs4proc.c | 138++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mfs/nfs/nfs4session.c | 7+++++--
Mfs/nfs/nfs4session.h | 7++++---
Mfs/nfs/nfs4state.c | 1+
Mfs/nfs/nfs4trace.h | 25+++++++++++++++++++++++++
Mfs/nfs/nfs4xdr.c | 530+++++++++++++++++++++++--------------------------------------------------------
Mfs/nfs/nfstrace.c | 1+
Mfs/nfs/nfstrace.h | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mfs/nfs/pagelist.c | 47+++++++++++++++++++++++++++++------------------
Mfs/nfs/pnfs.c | 33+++++++++++++++++++++++----------
Mfs/nfs/pnfs.h | 2++
Mfs/nfs/pnfs_dev.c | 13+++++++++++++
Mfs/nfs/read.c | 2+-
Mfs/nfs/super.c | 2+-
Mfs/nfs/unlink.c | 8+++++---
Mfs/nfs/write.c | 19+++++++++++++++----
Mfs/nfsd/nfs4callback.c | 13-------------
Minclude/linux/nfs4.h | 1+
Minclude/linux/nfs_fs_sb.h | 1+
Minclude/linux/nfs_page.h | 10++++++++++
Minclude/linux/nfs_xdr.h | 37++++++++++++++++++++++++++++++++++++-
Minclude/linux/sunrpc/auth.h | 44++++++++++++++++++++++++++------------------
Minclude/linux/sunrpc/clnt.h | 3+++
Minclude/linux/sunrpc/gss_krb5_enctypes.h | 42+++++++++++++++++++++++++++++++++++++++++-
Minclude/linux/sunrpc/sched.h | 9+++++++++
Minclude/linux/sunrpc/xdr.h | 23+++++++++++++++++++----
Minclude/linux/sunrpc/xprt.h | 7-------
Minclude/linux/sunrpc/xprtsock.h | 1+
Ainclude/trace/events/rpcgss.h | 361+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/trace/events/rpcrdma.h | 12++++++++++--
Minclude/trace/events/sunrpc.h | 361+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mnet/sunrpc/Kconfig | 16++++++++++++++++
Mnet/sunrpc/auth.c | 136++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Mnet/sunrpc/auth_gss/Makefile | 2+-
Mnet/sunrpc/auth_gss/auth_gss.c | 553+++++++++++++++++++++++++++++++++++++++----------------------------------------
Mnet/sunrpc/auth_gss/gss_krb5_mech.c | 29+++--------------------------
Mnet/sunrpc/auth_gss/gss_krb5_wrap.c | 8+++++---
Mnet/sunrpc/auth_gss/gss_mech_switch.c | 27+--------------------------
Mnet/sunrpc/auth_gss/gss_rpc_upcall.c | 15+--------------
Mnet/sunrpc/auth_gss/gss_rpc_upcall.h | 16++--------------
Mnet/sunrpc/auth_gss/gss_rpc_xdr.c | 15+--------------
Mnet/sunrpc/auth_gss/gss_rpc_xdr.h | 17+----------------
Mnet/sunrpc/auth_gss/svcauth_gss.c | 3++-
Anet/sunrpc/auth_gss/trace.c | 11+++++++++++
Mnet/sunrpc/auth_null.c | 56++++++++++++++++++++++++++++++--------------------------
Mnet/sunrpc/auth_unix.c | 122++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Mnet/sunrpc/backchannel_rqst.c | 42+++++++++++++++++++++++++-----------------
Mnet/sunrpc/clnt.c | 660++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mnet/sunrpc/sched.c | 17++++++++++-------
Mnet/sunrpc/svc.c | 19++++---------------
Mnet/sunrpc/xdr.c | 121++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Mnet/sunrpc/xprt.c | 23+++++++++++++++++------
Mnet/sunrpc/xprtrdma/backchannel.c | 3+--
Mnet/sunrpc/xprtrdma/frwr_ops.c | 4++--
Mnet/sunrpc/xprtrdma/rpc_rdma.c | 22+++++++++++++++++++---
Mnet/sunrpc/xprtrdma/svc_rdma_backchannel.c | 1-
Mnet/sunrpc/xprtrdma/transport.c | 2--
Mnet/sunrpc/xprtrdma/verbs.c | 2++
Mnet/sunrpc/xprtrdma/xprt_rdma.h | 12+++++++++++-
Mnet/sunrpc/xprtsock.c | 317+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
83 files changed, 3366 insertions(+), 2203 deletions(-)

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c @@ -75,17 +75,6 @@ static void nlm4_compute_offsets(const struct nlm_lock *lock, } /* - * Handle decode buffer overflows out-of-line. - */ -static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) -{ - dprintk("lockd: %s prematurely hit the end of our receive buffer. " - "Remaining buffer length is %tu words.\n", - func, xdr->end - xdr->p); -} - - -/* * Encode/decode NLMv4 basic data types * * Basic NLMv4 data types are defined in Appendix II, section 6.1.4 @@ -176,7 +165,6 @@ out_size: dprintk("NFS: returned cookie was too long: %u\n", length); return -EIO; out_overflow: - print_overflow_msg(__func__, xdr); return -EIO; } @@ -236,7 +224,6 @@ out_bad_xdr: __func__, be32_to_cpup(p)); return -EIO; out_overflow: - print_overflow_msg(__func__, xdr); return -EIO; } @@ -309,7 +296,6 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result) out: return error; out_overflow: - print_overflow_msg(__func__, xdr); return -EIO; } diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c @@ -71,17 +71,6 @@ static void nlm_compute_offsets(const struct nlm_lock *lock, } /* - * Handle decode buffer overflows out-of-line. - */ -static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) -{ - dprintk("lockd: %s prematurely hit the end of our receive buffer. " - "Remaining buffer length is %tu words.\n", - func, xdr->end - xdr->p); -} - - -/* * Encode/decode NLMv3 basic data types * * Basic NLMv3 data types are not defined in an IETF standards @@ -173,7 +162,6 @@ out_size: dprintk("NFS: returned cookie was too long: %u\n", length); return -EIO; out_overflow: - print_overflow_msg(__func__, xdr); return -EIO; } @@ -231,7 +219,6 @@ out_enum: __func__, be32_to_cpup(p)); return -EIO; out_overflow: - print_overflow_msg(__func__, xdr); return -EIO; } @@ -303,7 +290,6 @@ static int decode_nlm_holder(struct xdr_stream *xdr, struct nlm_res *result) out: return error; out_overflow: - print_overflow_msg(__func__, xdr); return -EIO; } diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c @@ -72,16 +72,6 @@ static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p) return xdr_ressize_check(rqstp, p); } -static __be32 *read_buf(struct xdr_stream *xdr, size_t nbytes) -{ - __be32 *p; - - p = xdr_inline_decode(xdr, nbytes); - if (unlikely(p == NULL)) - printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n"); - return p; -} - static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str, size_t maxlen) { @@ -98,13 +88,13 @@ static __be32 decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh) { __be32 *p; - p = read_buf(xdr, 4); + p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); fh->size = ntohl(*p); if (fh->size > NFS4_FHSIZE) return htonl(NFS4ERR_BADHANDLE); - p = read_buf(xdr, fh->size); + p = xdr_inline_decode(xdr, fh->size); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); memcpy(&fh->data[0], p, fh->size); @@ -117,11 +107,11 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) __be32 *p; unsigned int attrlen; - p = read_buf(xdr, 4); + p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); attrlen = ntohl(*p); - p = read_buf(xdr, attrlen << 2); + p = xdr_inline_decode(xdr, attrlen << 2); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); if (likely(attrlen > 0)) @@ -135,7 +125,7 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) { __be32 *p; - p = read_buf(xdr, NFS4_STATEID_SIZE); + p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); memcpy(stateid->data, p, NFS4_STATEID_SIZE); @@ -156,7 +146,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound status = decode_string(xdr, &hdr->taglen, &hdr->tag, CB_OP_TAGLEN_MAXSZ); if (unlikely(status != 0)) return status; - p = read_buf(xdr, 12); + p = xdr_inline_decode(xdr, 12); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); hdr->minorversion = ntohl(*p++); @@ -176,7 +166,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op) { __be32 *p; - p = read_buf(xdr, 4); + p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE_HDR); *op = ntohl(*p); @@ -205,7 +195,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, status = decode_delegation_stateid(xdr, &args->stateid); if (unlikely(status != 0)) return status; - p = read_buf(xdr, 4); + p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); args->truncate = ntohl(*p); @@ -227,7 +217,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, __be32 status = 0; uint32_t iomode; - p = read_buf(xdr, 4 * sizeof(uint32_t)); + p = xdr_inline_decode(xdr, 4 * sizeof(uint32_t)); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); @@ -245,14 +235,14 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, if (unlikely(status != 0)) return status; - p = read_buf(xdr, 2 * sizeof(uint64_t)); + p = xdr_inline_decode(xdr, 2 * sizeof(uint64_t)); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); p = xdr_decode_hyper(p, &args->cbl_range.offset); p = xdr_decode_hyper(p, &args->cbl_range.length); return decode_layout_stateid(xdr, &args->cbl_stateid); } else if (args->cbl_recall_type == RETURN_FSID) { - p = read_buf(xdr, 2 * sizeof(uint64_t)); + p = xdr_inline_decode(xdr, 2 * sizeof(uint64_t)); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); p = xdr_decode_hyper(p, &args->cbl_fsid.major); @@ -275,7 +265,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, args->ndevs = 0; /* Num of device notifications */ - p = read_buf(xdr, sizeof(uint32_t)); + p = xdr_inline_decode(xdr, sizeof(uint32_t)); if (unlikely(p == NULL)) { status = htonl(NFS4ERR_BADXDR); goto out; @@ -298,7 +288,8 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, for (i = 0; i < n; i++) { struct cb_devicenotifyitem *dev = &args->devs[i]; - p = read_buf(xdr, (4 * sizeof(uint32_t)) + NFS4_DEVICEID4_SIZE); + p = xdr_inline_decode(xdr, (4 * sizeof(uint32_t)) + + NFS4_DEVICEID4_SIZE); if (unlikely(p == NULL)) { status = htonl(NFS4ERR_BADXDR); goto err; @@ -329,7 +320,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) { - p = read_buf(xdr, sizeof(uint32_t)); + p = xdr_inline_decode(xdr, sizeof(uint32_t)); if (unlikely(p == NULL)) { status = htonl(NFS4ERR_BADXDR); goto err; @@ -359,7 +350,7 @@ static __be32 decode_sessionid(struct xdr_stream *xdr, { __be32 *p; - p = read_buf(xdr, NFS4_MAX_SESSIONID_LEN); + p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); @@ -379,13 +370,13 @@ static __be32 decode_rc_list(struct xdr_stream *xdr, goto out; status = htonl(NFS4ERR_RESOURCE); - p = read_buf(xdr, sizeof(uint32_t)); + p = xdr_inline_decode(xdr, sizeof(uint32_t)); if (unlikely(p == NULL)) goto out; rc_list->rcl_nrefcalls = ntohl(*p++); if (rc_list->rcl_nrefcalls) { - p = read_buf(xdr, + p = xdr_inline_decode(xdr, rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t)); if (unlikely(p == NULL)) goto out; @@ -418,7 +409,7 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp, if (status) return status; - p = read_buf(xdr, 5 * sizeof(uint32_t)); + p = xdr_inline_decode(xdr, 5 * sizeof(uint32_t)); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); @@ -461,7 +452,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp, uint32_t bitmap[2]; __be32 *p, status; - p = read_buf(xdr, 4); + p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); args->craa_objs_to_keep = ntohl(*p++); @@ -480,7 +471,7 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp, struct cb_recallslotargs *args = argp; __be32 *p; - p = read_buf(xdr, 4); + p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); args->crsa_target_highest_slotid = ntohl(*p++); @@ -492,14 +483,14 @@ static __be32 decode_lockowner(struct xdr_stream *xdr, struct cb_notify_lock_arg __be32 *p; unsigned int len; - p = read_buf(xdr, 12); + p = xdr_inline_decode(xdr, 12); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); p = xdr_decode_hyper(p, &args->cbnl_owner.clientid); len = be32_to_cpu(*p); - p = read_buf(xdr, len); + p = xdr_inline_decode(xdr, len); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); @@ -537,7 +528,7 @@ static __be32 decode_write_response(struct xdr_stream *xdr, __be32 *p; /* skip the always zero field */ - p = read_buf(xdr, 4); + p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out; p++; @@ -577,7 +568,7 @@ static __be32 decode_offload_args(struct svc_rqst *rqstp, return status; /* decode status */ - p = read_buf(xdr, 4); + p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out; args->error = ntohl(*p++); @@ -943,10 +934,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) }; unsigned int nops = 0; - xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); + xdr_init_decode(&xdr_in, &rqstp->rq_arg, + rqstp->rq_arg.head[0].iov_base, NULL); p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); - xdr_init_encode(&xdr_out, &rqstp->rq_res, p); + xdr_init_encode(&xdr_out, &rqstp->rq_res, p, NULL); status = decode_compound_hdr_arg(&xdr_in, &hdr_arg); if (status == htonl(NFS4ERR_RESOURCE)) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c @@ -229,6 +229,8 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation spin_lock(&delegation->lock); if (delegation->inode != NULL) inode = igrab(delegation->inode); + if (!inode) + set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags); spin_unlock(&delegation->lock); return inode; } @@ -681,7 +683,7 @@ void nfs_expire_all_delegations(struct nfs_client *clp) /** * nfs_super_return_all_delegations - return delegations for one superblock - * @sb: sb to process + * @server: pointer to nfs_server to process * */ void nfs_server_return_all_delegations(struct nfs_server *server) @@ -944,10 +946,11 @@ restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(delegation, &server->delegations, super_list) { - if (test_bit(NFS_DELEGATION_RETURNING, - &delegation->flags)) - continue; - if (test_bit(NFS_DELEGATION_NEED_RECLAIM, + if (test_bit(NFS_DELEGATION_INODE_FREEING, + &delegation->flags) || + test_bit(NFS_DELEGATION_RETURNING, + &delegation->flags) || + test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) continue; if (!nfs_sb_active(server->super)) @@ -1053,10 +1056,11 @@ restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(delegation, &server->delegations, super_list) { - if (test_bit(NFS_DELEGATION_RETURNING, - &delegation->flags)) - continue; - if (test_bit(NFS_DELEGATION_TEST_EXPIRED, + if (test_bit(NFS_DELEGATION_INODE_FREEING, + &delegation->flags) || + test_bit(NFS_DELEGATION_RETURNING, + &delegation->flags) || + test_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags) == 0) continue; if (!nfs_sb_active(server->super)) diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h @@ -34,6 +34,7 @@ enum { NFS_DELEGATION_RETURNING, NFS_DELEGATION_REVOKED, NFS_DELEGATION_TEST_EXPIRED, + NFS_DELEGATION_INODE_FREEING, }; int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c @@ -139,12 +139,19 @@ struct nfs_cache_array { struct nfs_cache_array_entry array[0]; }; +struct readdirvec { + unsigned long nr; + unsigned long index; + struct page *pages[NFS_MAX_READDIR_RAPAGES]; +}; + typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool); typedef struct { struct file *file; struct page *page; struct dir_context *ctx; unsigned long page_index; + struct readdirvec pvec; u64 *dir_cookie; u64 last_cookie; loff_t current_index; @@ -524,6 +531,10 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en struct nfs_cache_array *array; unsigned int count = 0; int status; + int max_rapages = NFS_MAX_READDIR_RAPAGES; + + desc->pvec.index = desc->page_index; + desc->pvec.nr = 0; scratch = alloc_page(GFP_KERNEL); if (scratch == NULL) @@ -548,20 +559,40 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en if (desc->plus) nfs_prime_dcache(file_dentry(desc->file), entry); - status = nfs_readdir_add_to_array(entry, page); + status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]); + if (status == -ENOSPC) { + desc->pvec.nr++; + if (desc->pvec.nr == max_rapages) + break; + status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]); + } if (status != 0) break; } while (!entry->eof); + /* + * page and desc->pvec.pages[0] are valid, don't need to check + * whether or not to be NULL. + */ + copy_highpage(page, desc->pvec.pages[0]); + out_nopages: if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { - array = kmap(page); + array = kmap_atomic(desc->pvec.pages[desc->pvec.nr]); array->eof_index = array->size; status = 0; - kunmap(page); + kunmap_atomic(array); } put_page(scratch); + + /* + * desc->pvec.nr > 0 means at least one page was completely filled, + * we should return -ENOSPC. Otherwise function + * nfs_readdir_xdr_to_array will enter infinite loop. + */ + if (desc->pvec.nr > 0) + return -ENOSPC; return status; } @@ -574,8 +605,8 @@ void nfs_readdir_free_pages(struct page **pages, unsigned int npages) } /* - * nfs_readdir_large_page will allocate pages that must be freed with a call - * to nfs_readdir_free_pagearray + * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call + * to nfs_readdir_free_pages() */ static int nfs_readdir_alloc_pages(struct page **pages, unsigned int npages) @@ -595,6 +626,24 @@ out_freepages: return -ENOMEM; } +/* + * nfs_readdir_rapages_init initialize rapages by nfs_cache_array structure. + */ +static +void nfs_readdir_rapages_init(nfs_readdir_descriptor_t *desc) +{ + struct nfs_cache_array *array; + int max_rapages = NFS_MAX_READDIR_RAPAGES; + int index; + + for (index = 0; index < max_rapages; index++) { + array = kmap_atomic(desc->pvec.pages[index]); + memset(array, 0, sizeof(struct nfs_cache_array)); + array->eof_index = -1; + kunmap_atomic(array); + } +} + static int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode) { @@ -605,6 +654,12 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, int status = -ENOMEM; unsigned int array_size = ARRAY_SIZE(pages); + /* + * This means we hit readdir rdpages miss, the preallocated rdpages + * are useless, the preallocate rdpages should be reinitialized. + */ + nfs_readdir_rapages_init(desc); + entry.prev_cookie = 0; entry.cookie = desc->last_cookie; entry.eof = 0; @@ -664,9 +719,24 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) struct inode *inode = file_inode(desc->file); int ret; - ret = nfs_readdir_xdr_to_array(desc, page, inode); - if (ret < 0) - goto error; + /* + * If desc->page_index in range desc->pvec.index and + * desc->pvec.index + desc->pvec.nr, we get readdir cache hit. + */ + if (desc->page_index >= desc->pvec.index && + desc->page_index < (desc->pvec.index + desc->pvec.nr)) { + /* + * page and desc->pvec.pages[x] are valid, don't need to check + * whether or not to be NULL. + */ + copy_highpage(page, desc->pvec.pages[desc->page_index - desc->pvec.index]); + ret = 0; + } else { + ret = nfs_readdir_xdr_to_array(desc, page, inode); + if (ret < 0) + goto error; + } + SetPageUptodate(page); if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { @@ -831,6 +901,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) *desc = &my_desc; struct nfs_open_dir_context *dir_ctx = file->private_data; int res = 0; + int max_rapages = NFS_MAX_READDIR_RAPAGES; dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", file, (long long)ctx->pos); @@ -850,6 +921,12 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = nfs_use_readdirplus(inode, ctx); + res = nfs_readdir_alloc_pages(desc->pvec.pages, max_rapages); + if (res < 0) + return -ENOMEM; + + nfs_readdir_rapages_init(desc); + if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) @@ -885,6 +962,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; } while (!desc->eof); out: + nfs_readdir_free_pages(desc->pvec.pages, max_rapages); if (res > 0) res = 0; dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); @@ -945,7 +1023,7 @@ static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end, /** * nfs_force_lookup_revalidate - Mark the directory as having changed - * @dir - pointer to directory inode + * @dir: pointer to directory inode * * This forces the revalidation code in nfs_lookup_revalidate() to do a * full lookup on all child dentries of 'dir' whenever a change occurs @@ -1649,7 +1727,7 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, reval_dentry: if (flags & LOOKUP_RCU) return -ECHILD; - return nfs_lookup_revalidate_dentry(dir, dentry, inode);; + return nfs_lookup_revalidate_dentry(dir, dentry, inode); full_reval: return nfs_do_lookup_revalidate(dir, dentry, flags); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c @@ -428,7 +428,7 @@ out_put: hdr->release(hdr); } -static void nfs_read_sync_pgio_error(struct list_head *head) +static void nfs_read_sync_pgio_error(struct list_head *head, int error) { struct nfs_page *req; @@ -664,8 +664,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) list_for_each_entry_safe(req, tmp, &reqs, wb_list) { if (!nfs_pageio_add_request(&desc, req)) { - nfs_list_remove_request(req); - nfs_list_add_request(req, &failed); + nfs_list_move_request(req, &failed); spin_lock(&cinfo.inode->i_lock); dreq->flags = 0; if (desc.pg_error < 0) @@ -821,7 +820,7 @@ out_put: hdr->release(hdr); } -static void nfs_write_sync_pgio_error(struct list_head *head) +static void nfs_write_sync_pgio_error(struct list_head *head, int error) { struct nfs_page *req; diff --git a/fs/nfs/file.c b/fs/nfs/file.c @@ -89,8 +89,8 @@ EXPORT_SYMBOL_GPL(nfs_file_release); /** * nfs_revalidate_size - Revalidate the file size - * @inode - pointer to inode struct - * @file - pointer to struct file + * @inode: pointer to inode struct + * @filp: pointer to struct file * * Revalidates the file length. This is basically a wrapper around * nfs_revalidate_inode() that takes into account the fact that we may @@ -276,6 +276,12 @@ EXPORT_SYMBOL_GPL(nfs_file_fsync); * then a modify/write/read cycle when writing to a page in the * page cache. * + * Some pNFS layout drivers can only read/write at a certain block + * granularity like all block devices and therefore we must perform + * read/modify/write whenever a page hasn't read yet and the data + * to be written there is not aligned to a block boundary and/or + * smaller than the block size. + * * The modify/write/read cycle may occur if a page is read before * being completely filled by the writer. In this situation, the * page must be completely written to stable storage on the server @@ -291,26 +297,32 @@ EXPORT_SYMBOL_GPL(nfs_file_fsync); * and that the new data won't completely replace the old data in * that range of the file. */ -static int nfs_want_read_modify_write(struct file *file, struct page *page, - loff_t pos, unsigned len) +static bool nfs_full_page_write(struct page *page, loff_t pos, unsigned int len) { unsigned int pglen = nfs_page_length(page); unsigned int offset = pos & (PAGE_SIZE - 1); unsigned int end = offset + len; - if (pnfs_ld_read_whole_page(file->f_mapping->host)) { - if (!PageUptodate(page)) - return 1; - return 0; - } + return !pglen || (end >= pglen && !offset); +} - if ((file->f_mode & FMODE_READ) && /* open for read? */ - !PageUptodate(page) && /* Uptodate? */ - !PagePrivate(page) && /* i/o request already? */ - pglen && /* valid bytes of file? */ - (end < pglen || offset)) /* replace all valid bytes? */ - return 1; - return 0; +static bool nfs_want_read_modify_write(struct file *file, struct page *page, + loff_t pos, unsigned int len) +{ + /* + * Up-to-date pages, those with ongoing or full-page write + * don't need read/modify/write + */ + if (PageUptodate(page) || PagePrivate(page) || + nfs_full_page_write(page, pos, len)) + return false; + + if (pnfs_ld_read_whole_page(file->f_mapping->host)) + return true; + /* Open for reading too? */ + if (file->f_mode & FMODE_READ) + return true; + return false; } /* diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -410,7 +410,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, for (i = 0; i < fls->mirror_array_cnt; i++) { struct nfs4_ff_layout_mirror *mirror; struct cred *kcred; - const struct cred *cred; + const struct cred __rcu *cred; kuid_t uid; kgid_t gid; u32 ds_count, fh_count, id; @@ -501,7 +501,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, goto out_err_free; kcred->fsuid = uid; kcred->fsgid = gid; - cred = kcred; + cred = RCU_INITIALIZER(kcred); if (lgr->range.iomode == IOMODE_READ) rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); @@ -788,30 +788,82 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, } } +static void +ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx) +{ + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + + if (devid) + nfs4_mark_deviceid_unavailable(devid); +} + +static void +ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx) +{ + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + + if (devid) + nfs4_mark_deviceid_available(devid); +} + static struct nfs4_pnfs_ds * -ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, - int start_idx, - int *best_idx) +ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, + int start_idx, int *best_idx, + bool check_device) { struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); + struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; bool fail_return = false; int idx; - /* mirrors are sorted by efficiency */ + /* mirrors are initially sorted by efficiency */ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { if (idx+1 == fls->mirror_array_cnt) - fail_return = true; - ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return); - if (ds) { - *best_idx = idx; - return ds; - } + fail_return = !check_device; + + mirror = FF_LAYOUT_COMP(lseg, idx); + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, fail_return); + if (!ds) + continue; + + if (check_device && + nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) + continue; + + *best_idx = idx; + return ds; } return NULL; } +static struct nfs4_pnfs_ds * +ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg, + int start_idx, int *best_idx) +{ + return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false); +} + +static struct nfs4_pnfs_ds * +ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg, + int start_idx, int *best_idx) +{ + return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true); +} + +static struct nfs4_pnfs_ds * +ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, + int start_idx, int *best_idx) +{ + struct nfs4_pnfs_ds *ds; + + ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx); + if (ds) + return ds; + return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); +} + static void ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, @@ -925,7 +977,8 @@ retry: goto out_mds; for (i = 0; i < pgio->pg_mirror_count; i++) { - ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); + mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); + ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true); if (!ds) { if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; @@ -936,7 +989,6 @@ retry: goto retry; } pgm = &pgio->pg_mirrors[i]; - mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; } @@ -1071,6 +1123,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, break; case -NFS4ERR_RETRY_UNCACHED_REP: break; + case -EAGAIN: + return -NFS4ERR_RESET_TO_PNFS; /* Invalidate Layout errors */ case -NFS4ERR_PNFS_NO_LAYOUT: case -ESTALE: /* mapped NFS4ERR_STALE */ @@ -1131,6 +1185,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, case -EBADHANDLE: case -ELOOP: case -ENOSPC: + case -EAGAIN: break; case -EJUKEBOX: nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); @@ -1158,8 +1213,10 @@ static int ff_layout_async_handle_error(struct rpc_task *task, { int vers = clp->cl_nfs_mod->rpc_vers->number; - if (task->tk_status >= 0) + if (task->tk_status >= 0) { + ff_layout_mark_ds_reachable(lseg, idx); return 0; + } /* Handle the case of an invalid layout segment */ if (!pnfs_is_valid_lseg(lseg)) @@ -1222,6 +1279,8 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), mirror, offset, length, status, opnum, GFP_NOIO); + if (status == NFS4ERR_NXIO) + ff_layout_mark_ds_unreachable(lseg, idx); pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status); } @@ -1249,7 +1308,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task, if (ff_layout_choose_best_ds_for_read(hdr->lseg, hdr->pgio_mirror_idx + 1, &hdr->pgio_mirror_idx)) - goto out_eagain; + goto out_layouterror; set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: @@ -1260,6 +1319,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task, } return 0; +out_layouterror: + ff_layout_send_layouterror(hdr->lseg); out_eagain: rpc_restart_call_prepare(task); return -EAGAIN; @@ -1293,15 +1354,6 @@ ff_layout_set_layoutcommit(struct inode *inode, (unsigned long long) NFS_I(inode)->layout->plh_lwb); } -static bool -ff_layout_device_unavailable(struct pnfs_layout_segment *lseg, int idx) -{ - /* No mirroring for now */ - struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx); - - return ff_layout_test_devid_unavailable(node); -} - static void ff_layout_read_record_layoutstats_start(struct rpc_task *task, struct nfs_pgio_header *hdr) { @@ -1332,10 +1384,6 @@ static int ff_layout_read_prepare_common(struct rpc_task *task, rpc_exit(task, -EIO); return -EIO; } - if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) { - rpc_exit(task, -EHOSTDOWN); - return -EAGAIN; - } ff_layout_read_record_layoutstats_start(task, hdr); return 0; @@ -1369,6 +1417,16 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) ff_layout_read_prepare_common(task, hdr); } +static void +ff_layout_io_prepare_transmit(struct rpc_task *task, + void *data) +{ + struct nfs_pgio_header *hdr = data; + + if (!pnfs_is_valid_lseg(hdr->lseg)) + rpc_exit(task, -EAGAIN); +} + static void ff_layout_read_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; @@ -1399,9 +1457,10 @@ static void ff_layout_read_release(void *data) struct nfs_pgio_header *hdr = data; ff_layout_read_record_layoutstats_done(&hdr->task, hdr); - if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) + if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) { + ff_layout_send_layouterror(hdr->lseg); pnfs_read_resend_pnfs(hdr); - else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) + } else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) ff_layout_reset_read(hdr); pnfs_generic_rw_release(data); } @@ -1513,11 +1572,6 @@ static int ff_layout_write_prepare_common(struct rpc_task *task, return -EIO; } - if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) { - rpc_exit(task, -EHOSTDOWN); - return -EAGAIN; - } - ff_layout_write_record_layoutstats_start(task, hdr); return 0; } @@ -1573,9 +1627,10 @@ static void ff_layout_write_release(void *data) struct nfs_pgio_header *hdr = data; ff_layout_write_record_layoutstats_done(&hdr->task, hdr); - if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) + if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) { + ff_layout_send_layouterror(hdr->lseg); ff_layout_reset_write(hdr, true); - else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) + } else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) ff_layout_reset_write(hdr, false); pnfs_generic_rw_release(data); } @@ -1657,6 +1712,7 @@ static void ff_layout_commit_release(void *data) static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { .rpc_call_prepare = ff_layout_read_prepare_v3, + .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, .rpc_call_done = ff_layout_read_call_done, .rpc_count_stats = ff_layout_read_count_stats, .rpc_release = ff_layout_read_release, @@ -1664,6 +1720,7 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { .rpc_call_prepare = ff_layout_read_prepare_v4, + .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, .rpc_call_done = ff_layout_read_call_done, .rpc_count_stats = ff_layout_read_count_stats, .rpc_release = ff_layout_read_release, @@ -1671,6 +1728,7 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { .rpc_call_prepare = ff_layout_write_prepare_v3, + .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, .rpc_call_done = ff_layout_write_call_done, .rpc_count_stats = ff_layout_write_count_stats, .rpc_release = ff_layout_write_release, @@ -1678,6 +1736,7 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { .rpc_call_prepare = ff_layout_write_prepare_v4, + .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, .rpc_call_done = ff_layout_write_call_done, .rpc_count_stats = ff_layout_write_count_stats, .rpc_release = ff_layout_write_release, @@ -1703,6 +1762,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; + struct nfs4_ff_layout_mirror *mirror; const struct cred *ds_cred; loff_t offset = hdr->args.offset; u32 idx = hdr->pgio_mirror_idx; @@ -1713,20 +1773,21 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) __func__, hdr->inode->i_ino, hdr->args.pgbase, (size_t)hdr->args.count, offset); - ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); + mirror = FF_LAYOUT_COMP(lseg, idx); + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); if (!ds) goto out_failed; - ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, + ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, hdr->inode); if (IS_ERR(ds_clnt)) goto out_failed; - ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); + ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred); if (!ds_cred) goto out_failed; - vers = nfs4_ff_layout_ds_version(lseg, idx); + vers = nfs4_ff_layout_ds_version(mirror); dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers); @@ -1734,13 +1795,11 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) hdr->pgio_done_cb = ff_layout_read_done_cb; refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; - fh = nfs4_ff_layout_select_ds_fh(lseg, idx); + fh = nfs4_ff_layout_select_ds_fh(mirror); if (fh) hdr->args.fh = fh; - if (vers == 4 && - !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) - goto out_failed; + nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid); /* * Note that if we ever decide to split across DSes, @@ -1770,26 +1829,28 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; + struct nfs4_ff_layout_mirror *mirror; const struct cred *ds_cred; loff_t offset = hdr->args.offset; int vers; struct nfs_fh *fh; int idx = hdr->pgio_mirror_idx; - ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); + mirror = FF_LAYOUT_COMP(lseg, idx); + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); if (!ds) goto out_failed; - ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, + ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, hdr->inode); if (IS_ERR(ds_clnt)) goto out_failed; - ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); + ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred); if (!ds_cred) goto out_failed; - vers = nfs4_ff_layout_ds_version(lseg, idx); + vers = nfs4_ff_layout_ds_version(mirror); dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n", __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, @@ -1800,13 +1861,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; hdr->ds_commit_idx = idx; - fh = nfs4_ff_layout_select_ds_fh(lseg, idx); + fh = nfs4_ff_layout_select_ds_fh(mirror); if (fh) hdr->args.fh = fh; - if (vers == 4 && - !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) - goto out_failed; + nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid); /* * Note that if we ever decide to split across DSes, @@ -1849,6 +1908,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) struct pnfs_layout_segment *lseg = data->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; + struct nfs4_ff_layout_mirror *mirror; const struct cred *ds_cred; u32 idx; int vers, ret; @@ -1859,20 +1919,21 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) goto out_err; idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); - ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); + mirror = FF_LAYOUT_COMP(lseg, idx); + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); if (!ds) goto out_err; - ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, + ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, data->inode); if (IS_ERR(ds_clnt)) goto out_err; - ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred); + ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, data->cred); if (!ds_cred) goto out_err; - vers = nfs4_ff_layout_ds_version(lseg, idx); + vers = nfs4_ff_layout_ds_version(mirror); dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count), @@ -2036,7 +2097,7 @@ ff_layout_encode_layoutreturn(struct xdr_stream *xdr, dprintk("%s: Begin\n", __func__); - xdr_init_encode(&tmp_xdr, &tmp_buf, NULL); + xdr_init_encode(&tmp_xdr, &tmp_buf, NULL, NULL); ff_layout_encode_ioerr(&tmp_xdr, args, ff_args); ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args); @@ -2102,6 +2163,52 @@ out_nomem: return -ENOMEM; } +#ifdef CONFIG_NFS_V4_2 +void +ff_layout_send_layouterror(struct pnfs_layout_segment *lseg) +{ + struct pnfs_layout_hdr *lo = lseg->pls_layout; + struct nfs42_layout_error *errors; + LIST_HEAD(head); + + if (!nfs_server_capable(lo->plh_inode, NFS_CAP_LAYOUTERROR)) + return; + ff_layout_fetch_ds_ioerr(lo, &lseg->pls_range, &head, -1); + if (list_empty(&head)) + return; + + errors = kmalloc_array(NFS42_LAYOUTERROR_MAX, + sizeof(*errors), GFP_NOFS); + if (errors != NULL) { + const struct nfs4_ff_layout_ds_err *pos; + size_t n = 0; + + list_for_each_entry(pos, &head, list) { + errors[n].offset = pos->offset; + errors[n].length = pos->length; + nfs4_stateid_copy(&errors[n].stateid, &pos->stateid); + errors[n].errors[0].dev_id = pos->deviceid; + errors[n].errors[0].status = pos->status; + errors[n].errors[0].opnum = pos->opnum; + n++; + if (!list_is_last(&pos->list, &head) && + n < NFS42_LAYOUTERROR_MAX) + continue; + if (nfs42_proc_layouterror(lseg, errors, n) < 0) + break; + n = 0; + } + kfree(errors); + } + ff_layout_free_ds_ioerr(&head); +} +#else +void +ff_layout_send_layouterror(struct pnfs_layout_segment *lseg) +{ +} +#endif + static int ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen) { diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -132,16 +132,6 @@ FF_LAYOUT_LSEG(struct pnfs_layout_segment *lseg) generic_hdr); } -static inline struct nfs4_deviceid_node * -FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx) -{ - if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt || - FF_LAYOUT_LSEG(lseg)->mirror_array[idx] == NULL || - FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds == NULL) - return NULL; - return &FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds->id_node; -} - static inline struct nfs4_ff_layout_ds * FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node) { @@ -151,9 +141,25 @@ FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node) static inline struct nfs4_ff_layout_mirror * FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx) { - if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt) - return NULL; - return FF_LAYOUT_LSEG(lseg)->mirror_array[idx]; + struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); + + if (idx < fls->mirror_array_cnt) + return fls->mirror_array[idx]; + return NULL; +} + +static inline struct nfs4_deviceid_node * +FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx) +{ + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, idx); + + if (mirror != NULL) { + struct nfs4_ff_layout_ds *mirror_ds = mirror->mirror_ds; + + if (!IS_ERR_OR_NULL(mirror_ds)) + return &mirror_ds->id_node; + } + return NULL; } static inline u32 @@ -174,28 +180,10 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg) return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_READ_IO; } -static inline bool -ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) -{ - /* - * Flexfiles should never mark a DS unavailable, but if it does - * print a (ratelimited) warning as this can affect performance. - */ - if (nfs4_test_deviceid_unavailable(node)) { - u32 *p = (u32 *)node->deviceid.data; - - pr_warn_ratelimited("NFS: flexfiles layout referencing an " - "unavailable device [%x%x%x%x]\n", - p[0], p[1], p[2], p[3]); - return true; - } - return false; -} - static inline int -nfs4_ff_layout_ds_version(struct pnfs_layout_segment *lseg, u32 ds_idx) +nfs4_ff_layout_ds_version(const struct nfs4_ff_layout_mirror *mirror) { - return FF_LAYOUT_COMP(lseg, ds_idx)->mirror_ds->ds_versions[0].version; + return mirror->mirror_ds->ds_versions[0].version; } struct nfs4_ff_layout_ds * @@ -207,6 +195,7 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, struct nfs4_ff_layout_mirror *mirror, u64 offset, u64 length, int status, enum nfs_opnum4 opnum, gfp_t gfp_flags); +void ff_layout_send_layouterror(struct pnfs_layout_segment *lseg); int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head); void ff_layout_free_ds_ioerr(struct list_head *head); unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, @@ -214,23 +203,23 @@ unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, struct list_head *head, unsigned int maxnum); struct nfs_fh * -nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); -int -nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg, - u32 mirror_idx, - nfs4_stateid *stateid); +nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror); +void +nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror, + nfs4_stateid *stateid); struct nfs4_pnfs_ds * -nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, +nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, + struct nfs4_ff_layout_mirror *mirror, bool fail_return); struct rpc_clnt * -nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, - u32 ds_idx, +nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror, struct nfs_client *ds_clp, struct inode *inode); -const struct cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, - u32 ds_idx, const struct cred *mdscred); +const struct cred *ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror, + const struct pnfs_layout_range *range, + const struct cred *mdscred); bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg); bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg); diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -183,56 +183,6 @@ out_err: return NULL; } -static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg, - struct nfs4_deviceid_node *devid) -{ - nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); - if (!ff_layout_has_available_ds(lseg)) - pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, - lseg); -} - -static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, - struct nfs4_ff_layout_mirror *mirror, - bool create) -{ - if (mirror == NULL || IS_ERR(mirror->mirror_ds)) - goto outerr; - if (mirror->mirror_ds == NULL) { - if (create) { - struct nfs4_deviceid_node *node; - struct pnfs_layout_hdr *lh = lseg->pls_layout; - struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV); - - node = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), - &mirror->devid, lh->plh_lc_cred, - GFP_KERNEL); - if (node) - mirror_ds = FF_LAYOUT_MIRROR_DS(node); - - /* check for race with another call to this function */ - if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) && - mirror_ds != ERR_PTR(-ENODEV)) - nfs4_put_deviceid_node(node); - } else - goto outerr; - } - - if (IS_ERR(mirror->mirror_ds)) - goto outerr; - - if (mirror->mirror_ds->ds == NULL) { - struct nfs4_deviceid_node *devid; - devid = &mirror->mirror_ds->id_node; - ff_layout_mark_devid_invalid(lseg, devid); - return false; - } - return true; -outerr: - pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); - return false; -} - static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, u64 offset, u64 length) { @@ -326,7 +276,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, spin_lock(&flo->generic_hdr.plh_inode->i_lock); ff_layout_add_ds_error_locked(flo, dserr); spin_unlock(&flo->generic_hdr.plh_inode->i_lock); - return 0; } @@ -353,46 +302,54 @@ ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode) } struct nfs_fh * -nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx) +nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror) { - struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); - struct nfs_fh *fh = NULL; - - if (!ff_layout_mirror_valid(lseg, mirror, false)) { - pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", - __func__, mirror_idx); - goto out; - } - /* FIXME: For now assume there is only 1 version available for the DS */ - fh = &mirror->fh_versions[0]; -out: - return fh; + return &mirror->fh_versions[0]; } -int -nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg, - u32 mirror_idx, - nfs4_stateid *stateid) +void +nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror, + nfs4_stateid *stateid) { - struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); + if (nfs4_ff_layout_ds_version(mirror) == 4) + nfs4_stateid_copy(stateid, &mirror->stateid); +} - if (!ff_layout_mirror_valid(lseg, mirror, false)) { - pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", - __func__, mirror_idx); - goto out; +static bool +ff_layout_init_mirror_ds(struct pnfs_layout_hdr *lo, + struct nfs4_ff_layout_mirror *mirror) +{ + if (mirror == NULL) + goto outerr; + if (mirror->mirror_ds == NULL) { + struct nfs4_deviceid_node *node; + struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV); + + node = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), + &mirror->devid, lo->plh_lc_cred, + GFP_KERNEL); + if (node) + mirror_ds = FF_LAYOUT_MIRROR_DS(node); + + /* check for race with another call to this function */ + if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) && + mirror_ds != ERR_PTR(-ENODEV)) + nfs4_put_deviceid_node(node); } - nfs4_stateid_copy(stateid, &mirror->stateid); - return 1; -out: - return 0; + if (IS_ERR(mirror->mirror_ds)) + goto outerr; + + return true; +outerr: + return false; } /** * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call * @lseg: the layout segment we're operating on - * @ds_idx: index of the DS to use + * @mirror: layout mirror describing the DS to use * @fail_return: return layout on connect failure? * * Try to prepare a DS connection to accept an RPC call. This involves @@ -407,26 +364,18 @@ out: * Returns a pointer to a connected DS object on success or NULL on failure. */ struct nfs4_pnfs_ds * -nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, +nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, + struct nfs4_ff_layout_mirror *mirror, bool fail_return) { - struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); struct nfs4_pnfs_ds *ds = NULL; - struct nfs4_deviceid_node *devid; struct inode *ino = lseg->pls_layout->plh_inode; struct nfs_server *s = NFS_SERVER(ino); unsigned int max_payload; int status; - if (!ff_layout_mirror_valid(lseg, mirror, true)) { - pr_err_ratelimited("NFS: %s: No data server for offset index %d\n", - __func__, ds_idx); - goto out; - } - - devid = &mirror->mirror_ds->id_node; - if (ff_layout_test_devid_unavailable(devid)) - goto out_fail; + if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror)) + goto noconnect; ds = mirror->mirror_ds->ds; /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ @@ -437,8 +386,8 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, /* FIXME: For now we assume the server sent only one version of NFS * to use for the DS. */ - status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, - dataserver_retrans, + status = nfs4_pnfs_ds_connect(s, ds, &mirror->mirror_ds->id_node, + dataserver_timeo, dataserver_retrans, mirror->mirror_ds->ds_versions[0].version, mirror->mirror_ds->ds_versions[0].minor_version); @@ -453,11 +402,12 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, mirror->mirror_ds->ds_versions[0].wsize = max_payload; goto out; } -out_fail: +noconnect: ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), mirror, lseg->pls_range.offset, lseg->pls_range.length, NFS4ERR_NXIO, OP_ILLEGAL, GFP_NOIO); + ff_layout_send_layouterror(lseg); if (fail_return || !ff_layout_has_available_ds(lseg)) pnfs_error_mark_layout_for_return(ino, lseg); ds = NULL; @@ -466,14 +416,14 @@ out: } const struct cred * -ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, +ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror, + const struct pnfs_layout_range *range, const struct cred *mdscred) { - struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); const struct cred *cred; if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) { - cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode); + cred = ff_layout_get_mirror_cred(mirror, range->iomode); if (!cred) cred = get_cred(mdscred); } else { @@ -483,15 +433,18 @@ ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, } /** -* Find or create a DS rpc client with th MDS server rpc client auth flavor -* in the nfs_client cl_ds_clients list. -*/ + * nfs4_ff_find_or_create_ds_client - Find or create a DS rpc client + * @mirror: pointer to the mirror + * @ds_clp: nfs_client for the DS + * @inode: pointer to inode + * + * Find or create a DS rpc client with th MDS server rpc client auth flavor + * in the nfs_client cl_ds_clients list. + */ struct rpc_clnt * -nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx, +nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror, struct nfs_client *ds_clp, struct inode *inode) { - struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); - switch (mirror->mirror_ds->ds_versions[0].version) { case 3: /* For NFSv3 DS, flavor is set when creating DS connections */ @@ -608,7 +561,7 @@ static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg) if (IS_ERR(mirror->mirror_ds)) continue; devid = &mirror->mirror_ds->id_node; - if (!ff_layout_test_devid_unavailable(devid)) + if (!nfs4_test_deviceid_unavailable(devid)) return true; } } @@ -629,7 +582,7 @@ static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg) if (!mirror->mirror_ds) continue; devid = &mirror->mirror_ds->id_node; - if (ff_layout_test_devid_unavailable(devid)) + if (nfs4_test_deviceid_unavailable(devid)) return false; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c @@ -143,6 +143,7 @@ EXPORT_SYMBOL_GPL(nfs_sync_inode); /** * nfs_sync_mapping - helper to flush all mmapped dirty data to disk + * @mapping: pointer to struct address_space */ int nfs_sync_mapping(struct address_space *mapping) { @@ -1184,8 +1185,8 @@ int nfs_attribute_cache_expired(struct inode *inode) /** * nfs_revalidate_inode - Revalidate the inode attributes - * @server - pointer to nfs_server struct - * @inode - pointer to inode struct + * @server: pointer to nfs_server struct + * @inode: pointer to inode struct * * Updates inode attribute information by retrieving the data from the server. */ @@ -1255,8 +1256,8 @@ out: /** * nfs_revalidate_mapping - Revalidate the pagecache - * @inode - pointer to host inode - * @mapping - pointer to mapping + * @inode: pointer to host inode + * @mapping: pointer to mapping */ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) @@ -1371,8 +1372,8 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) /** * nfs_check_inode_attributes - verify consistency of the inode attribute cache - * @inode - pointer to inode - * @fattr - updated attributes + * @inode: pointer to inode + * @fattr: updated attributes * * Verifies the attribute cache. If we have just changed the attributes, * so that fattr carries weak cache consistency data, then it may @@ -1572,8 +1573,8 @@ EXPORT_SYMBOL_GPL(_nfs_display_fhandle); /** * nfs_inode_attrs_need_update - check if the inode attributes need updating - * @inode - pointer to inode - * @fattr - attributes + * @inode: pointer to inode + * @fattr: attributes * * Attempt to divine whether or not an RPC call reply carrying stale * attributes got scheduled after another call carrying updated ones. @@ -1614,8 +1615,8 @@ static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr /** * nfs_refresh_inode - try to update the inode attribute cache - * @inode - pointer to inode - * @fattr - updated attributes + * @inode: pointer to inode + * @fattr: updated attributes * * Check that an RPC call that returned attributes has not overlapped with * other recent updates of the inode metadata, then decide whether it is @@ -1649,8 +1650,8 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, /** * nfs_post_op_update_inode - try to update the inode attribute cache - * @inode - pointer to inode - * @fattr - updated attributes + * @inode: pointer to inode + * @fattr: updated attributes * * After an operation that has changed the inode metadata, mark the * attribute cache as being invalid, then try to update it. @@ -1679,8 +1680,8 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); /** * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache - * @inode - pointer to inode - * @fattr - updated attributes + * @inode: pointer to inode + * @fattr: updated attributes * * After an operation that has changed the inode metadata, mark the * attribute cache as being invalid, then try to update it. Fake up @@ -1731,8 +1732,8 @@ out_noforce: /** * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache - * @inode - pointer to inode - * @fattr - updated attributes + * @inode: pointer to inode + * @fattr: updated attributes * * After an operation that has changed the inode metadata, mark the * attribute cache as being invalid, then try to update it. Fake up diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h @@ -69,7 +69,8 @@ struct nfs_clone_mount { * Maximum number of pages that readdir can use for creating * a vmapped array of pages. */ -#define NFS_MAX_READDIR_PAGES 8 +#define NFS_MAX_READDIR_PAGES 64 +#define NFS_MAX_READDIR_RAPAGES 8 struct nfs_client_initdata { unsigned long init_flags; @@ -755,6 +756,7 @@ static inline bool nfs_error_is_fatal(int err) { switch (err) { case -ERESTARTSYS: + case -EINTR: case -EACCES: case -EDQUOT: case -EFBIG: @@ -763,6 +765,7 @@ static inline bool nfs_error_is_fatal(int err) case -EROFS: case -ESTALE: case -E2BIG: + case -ENOMEM: return true; default: return false; diff --git a/fs/nfs/io.c b/fs/nfs/io.c @@ -25,7 +25,7 @@ static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode) /** * nfs_start_io_read - declare the file is being used for buffered reads - * @inode - file inode + * @inode: file inode * * Declare that a buffered read operation is about to start, and ensure * that we block all direct I/O. @@ -56,7 +56,7 @@ nfs_start_io_read(struct inode *inode) /** * nfs_end_io_read - declare that the buffered read operation is done - * @inode - file inode + * @inode: file inode * * Declare that a buffered read operation is done, and release the shared * lock on inode->i_rwsem. @@ -69,7 +69,7 @@ nfs_end_io_read(struct inode *inode) /** * nfs_start_io_write - declare the file is being used for buffered writes - * @inode - file inode + * @inode: file inode * * Declare that a buffered read operation is about to start, and ensure * that we block all direct I/O. @@ -83,7 +83,7 @@ nfs_start_io_write(struct inode *inode) /** * nfs_end_io_write - declare that the buffered write operation is done - * @inode - file inode + * @inode: file inode * * Declare that a buffered write operation is done, and release the * lock on inode->i_rwsem. @@ -105,7 +105,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode) /** * nfs_end_io_direct - declare the file is being used for direct i/o - * @inode - file inode + * @inode: file inode * * Declare that a direct I/O operation is about to start, and ensure * that we block all buffered I/O. @@ -136,7 +136,7 @@ nfs_start_io_direct(struct inode *inode) /** * nfs_end_io_direct - declare that the direct i/o operation is done - * @inode - file inode + * @inode: file inode * * Declare that a direct I/O operation is done, and release the shared * lock on inode->i_rwsem. diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c @@ -221,10 +221,10 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, /** * nfs_do_submount - set up mountpoint when crossing a filesystem boundary - * @dentry - parent directory - * @fh - filehandle for new root dentry - * @fattr - attributes for new root inode - * @authflavor - security flavor to use when performing the mount + * @dentry: parent directory + * @fh: filehandle for new root dentry + * @fattr: attributes for new root inode + * @authflavor: security flavor to use when performing the mount * */ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c @@ -22,6 +22,7 @@ #include <linux/nfs.h> #include <linux/nfs2.h> #include <linux/nfs_fs.h> +#include "nfstrace.h" #include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -55,42 +56,16 @@ #define NFS_attrstat_sz (1+NFS_fattr_sz) #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz) -#define NFS_readlinkres_sz (2) -#define NFS_readres_sz (1+NFS_fattr_sz+1) +#define NFS_readlinkres_sz (2+1) +#define NFS_readres_sz (1+NFS_fattr_sz+1+1) #define NFS_writeres_sz (NFS_attrstat_sz) #define NFS_stat_sz (1) -#define NFS_readdirres_sz (1) +#define NFS_readdirres_sz (1+1) #define NFS_statfsres_sz (1+NFS_info_sz) static int nfs_stat_to_errno(enum nfs_stat); /* - * While encoding arguments, set up the reply buffer in advance to - * receive reply data directly into the page cache. - */ -static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages, - unsigned int base, unsigned int len, - unsigned int bufsize) -{ - struct rpc_auth *auth = req->rq_cred->cr_auth; - unsigned int replen; - - replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize; - xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len); -} - -/* - * Handle decode buffer overflows out-of-line. - */ -static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) -{ - dprintk("NFS: %s prematurely hit the end of our receive buffer. " - "Remaining buffer length is %tu words.\n", - func, xdr->end - xdr->p); -} - - -/* * Encode/decode NFSv2 basic data types * * Basic NFSv2 data types are defined in section 2.3 of RFC 1094: @@ -110,8 +85,8 @@ static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result) __be32 *p; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; count = be32_to_cpup(p); recvd = xdr_read_pages(xdr, count); if (unlikely(count > recvd)) @@ -125,9 +100,6 @@ out_cheating: "count %u > recvd %u\n", count, recvd); count = recvd; goto out; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -157,13 +129,16 @@ static int decode_stat(struct xdr_stream *xdr, enum nfs_stat *status) __be32 *p; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; + if (unlikely(*p != cpu_to_be32(NFS_OK))) + goto out_status; + *status = 0; + return 0; +out_status: *status = be32_to_cpup(p); + trace_nfs_xdr_status((int)*status); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -205,14 +180,11 @@ static int decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh) __be32 *p; p = xdr_inline_decode(xdr, NFS2_FHSIZE); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; fh->size = NFS2_FHSIZE; memcpy(fh->data, p, NFS2_FHSIZE); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -282,8 +254,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr) __be32 *p; p = xdr_inline_decode(xdr, NFS_fattr_sz << 2); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; fattr->valid |= NFS_ATTR_FATTR_V2; @@ -325,9 +297,6 @@ out_uid: out_gid: dprintk("NFS: returned invalid gid\n"); return -EINVAL; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -416,23 +385,20 @@ static int decode_filename_inline(struct xdr_stream *xdr, u32 count; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; count = be32_to_cpup(p); if (count > NFS3_MAXNAMLEN) goto out_nametoolong; p = xdr_inline_decode(xdr, count); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; *name = (const char *)p; *length = count; return 0; out_nametoolong: dprintk("NFS: returned filename too long: %u\n", count); return -ENAMETOOLONG; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -455,8 +421,8 @@ static int decode_path(struct xdr_stream *xdr) __be32 *p; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; length = be32_to_cpup(p); if (unlikely(length >= xdr->buf->page_len || length > NFS_MAXPATHLEN)) goto out_size; @@ -472,9 +438,6 @@ out_cheating: dprintk("NFS: server cheating in pathname result: " "length %u > received %u\n", length, recvd); return -EIO; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -615,8 +578,8 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req, const struct nfs_readlinkargs *args = data; encode_fhandle(xdr, args->fh); - prepare_reply_buffer(req, args->pages, args->pgbase, - args->pglen, NFS_readlinkres_sz); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->pglen, NFS_readlinkres_sz); } /* @@ -651,8 +614,8 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, const struct nfs_pgio_args *args = data; encode_readargs(xdr, args); - prepare_reply_buffer(req, args->pages, args->pgbase, - args->count, NFS_readres_sz); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->count, NFS_readres_sz); req->rq_rcv_buf.flags |= XDRBUF_READ; } @@ -809,8 +772,8 @@ static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req, const struct nfs_readdirargs *args = data; encode_readdirargs(xdr, args); - prepare_reply_buffer(req, args->pages, 0, - args->count, NFS_readdirres_sz); + rpc_prepare_reply_pages(req, args->pages, 0, + args->count, NFS_readdirres_sz); } /* @@ -951,12 +914,12 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int error; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EAGAIN; if (*p++ == xdr_zero) { p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EAGAIN; if (*p++ == xdr_zero) return -EAGAIN; entry->eof = 1; @@ -964,8 +927,8 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, } p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EAGAIN; entry->ino = be32_to_cpup(p); error = decode_filename_inline(xdr, &entry->name, &entry->len); @@ -978,17 +941,13 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, */ entry->prev_cookie = entry->cookie; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EAGAIN; entry->cookie = be32_to_cpup(p); entry->d_type = DT_UNKNOWN; return 0; - -out_overflow: - print_overflow_msg(__func__, xdr); - return -EAGAIN; } /* @@ -1052,17 +1011,14 @@ static int decode_info(struct xdr_stream *xdr, struct nfs2_fsstat *result) __be32 *p; p = xdr_inline_decode(xdr, NFS_info_sz << 2); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; result->tsize = be32_to_cpup(p++); result->bsize = be32_to_cpup(p++); result->blocks = be32_to_cpup(p++); result->bfree = be32_to_cpup(p++); result->bavail = be32_to_cpup(p); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr, diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c @@ -222,8 +222,6 @@ static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, switch (status) { case 0: status = nfs_refresh_inode(inode, fattr); - set_cached_acl(inode, ACL_TYPE_ACCESS, acl); - set_cached_acl(inode, ACL_TYPE_DEFAULT, dfacl); break; case -EPFNOSUPPORT: case -EPROTONOSUPPORT: diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c @@ -21,6 +21,7 @@ #include <linux/nfs3.h> #include <linux/nfs_fs.h> #include <linux/nfsacl.h> +#include "nfstrace.h" #include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -68,13 +69,13 @@ #define NFS3_removeres_sz (NFS3_setattrres_sz) #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz)) #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1) -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1) -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3) +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1) +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1) #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4) #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz)) #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2) +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1) #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13) #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12) #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6) @@ -84,7 +85,7 @@ #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \ XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)) #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \ - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)) + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1) #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) static int nfs3_stat_to_errno(enum nfs_stat); @@ -104,32 +105,6 @@ static const umode_t nfs_type2fmt[] = { }; /* - * While encoding arguments, set up the reply buffer in advance to - * receive reply data directly into the page cache. - */ -static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages, - unsigned int base, unsigned int len, - unsigned int bufsize) -{ - struct rpc_auth *auth = req->rq_cred->cr_auth; - unsigned int replen; - - replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize; - xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len); -} - -/* - * Handle decode buffer overflows out-of-line. - */ -static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) -{ - dprintk("NFS: %s prematurely hit the end of our receive buffer. " - "Remaining buffer length is %tu words.\n", - func, xdr->end - xdr->p); -} - - -/* * Encode/decode NFSv3 basic data types * * Basic NFSv3 data types are defined in section 2.5 of RFC 1813: @@ -151,13 +126,10 @@ static int decode_uint32(struct xdr_stream *xdr, u32 *value) __be32 *p; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; *value = be32_to_cpup(p); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_uint64(struct xdr_stream *xdr, u64 *value) @@ -165,13 +137,10 @@ static int decode_uint64(struct xdr_stream *xdr, u64 *value) __be32 *p; p = xdr_inline_decode(xdr, 8); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; xdr_decode_hyper(p, value); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -211,14 +180,14 @@ static int decode_inline_filename3(struct xdr_stream *xdr, u32 count; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; count = be32_to_cpup(p); if (count > NFS3_MAXNAMLEN) goto out_nametoolong; p = xdr_inline_decode(xdr, count); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; *name = (const char *)p; *length = count; return 0; @@ -226,9 +195,6 @@ static int decode_inline_filename3(struct xdr_stream *xdr, out_nametoolong: dprintk("NFS: returned filename too long: %u\n", count); return -ENAMETOOLONG; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -249,8 +215,8 @@ static int decode_nfspath3(struct xdr_stream *xdr) __be32 *p; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; count = be32_to_cpup(p); if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN)) goto out_nametoolong; @@ -267,9 +233,6 @@ out_cheating: dprintk("NFS: server cheating in pathname result: " "count %u > recvd %u\n", count, recvd); return -EIO; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -303,13 +266,10 @@ static int decode_cookieverf3(struct xdr_stream *xdr, __be32 *verifier) __be32 *p; p = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; memcpy(verifier, p, NFS3_COOKIEVERFSIZE); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -330,13 +290,10 @@ static int decode_writeverf3(struct xdr_stream *xdr, struct nfs_write_verifier * __be32 *p; p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; memcpy(verifier->data, p, NFS3_WRITEVERFSIZE); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -364,13 +321,16 @@ static int decode_nfsstat3(struct xdr_stream *xdr, enum nfs_stat *status) __be32 *p; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; + if (unlikely(*p != cpu_to_be32(NFS3_OK))) + goto out_status; + *status = 0; + return 0; +out_status: *status = be32_to_cpup(p); + trace_nfs_xdr_status((int)*status); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -453,23 +413,20 @@ static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh) __be32 *p; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; length = be32_to_cpup(p++); if (unlikely(length > NFS3_FHSIZE)) goto out_toobig; p = xdr_inline_decode(xdr, length); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; fh->size = length; memcpy(fh->data, p, length); return 0; out_toobig: dprintk("NFS: file handle size (%u) too big\n", length); return -E2BIG; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static void zero_nfs_fh3(struct nfs_fh *fh) @@ -655,8 +612,8 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr) __be32 *p; p = xdr_inline_decode(xdr, NFS3_fattr_sz << 2); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; p = xdr_decode_ftype3(p, &fmode); @@ -690,9 +647,6 @@ out_uid: out_gid: dprintk("NFS: returned invalid gid\n"); return -EINVAL; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -710,14 +664,11 @@ static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr) __be32 *p; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; if (*p != xdr_zero) return decode_fattr3(xdr, fattr); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -733,8 +684,8 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr) __be32 *p; p = xdr_inline_decode(xdr, NFS3_wcc_attr_sz << 2); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; fattr->valid |= NFS_ATTR_FATTR_PRESIZE | NFS_ATTR_FATTR_PRECHANGE @@ -747,9 +698,6 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr) fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -773,14 +721,11 @@ static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr) __be32 *p; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; if (*p != xdr_zero) return decode_wcc_attr(xdr, fattr); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr) @@ -808,15 +753,12 @@ out: static int decode_post_op_fh3(struct xdr_stream *xdr, struct nfs_fh *fh) { __be32 *p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; if (*p != xdr_zero) return decode_nfs_fh3(xdr, fh); zero_nfs_fh3(fh); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -953,8 +895,8 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req, const struct nfs3_readlinkargs *args = data; encode_nfs_fh3(xdr, args->fh); - prepare_reply_buffer(req, args->pages, args->pgbase, - args->pglen, NFS3_readlinkres_sz); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->pglen, NFS3_readlinkres_sz); } /* @@ -986,8 +928,8 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, unsigned int replen = args->replen ? args->replen : NFS3_readres_sz; encode_read3args(xdr, args); - prepare_reply_buffer(req, args->pages, args->pgbase, - args->count, replen); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->count, replen); req->rq_rcv_buf.flags |= XDRBUF_READ; } @@ -1279,7 +1221,7 @@ static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req, const struct nfs3_readdirargs *args = data; encode_readdir3args(xdr, args); - prepare_reply_buffer(req, args->pages, 0, + rpc_prepare_reply_pages(req, args->pages, 0, args->count, NFS3_readdirres_sz); } @@ -1321,7 +1263,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req, const struct nfs3_readdirargs *args = data; encode_readdirplus3args(xdr, args); - prepare_reply_buffer(req, args->pages, 0, + rpc_prepare_reply_pages(req, args->pages, 0, args->count, NFS3_readdirres_sz); } @@ -1366,7 +1308,7 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req, encode_nfs_fh3(xdr, args->fh); encode_uint32(xdr, args->mask); if (args->mask & (NFS_ACL | NFS_DFACL)) { - prepare_reply_buffer(req, args->pages, 0, + rpc_prepare_reply_pages(req, args->pages, 0, NFSACL_MAXPAGES << PAGE_SHIFT, ACL3_getaclres_sz); req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; @@ -1643,8 +1585,8 @@ static int decode_read3resok(struct xdr_stream *xdr, __be32 *p; p = xdr_inline_decode(xdr, 4 + 4 + 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; count = be32_to_cpup(p++); eof = be32_to_cpup(p++); ocount = be32_to_cpup(p++); @@ -1667,9 +1609,6 @@ out_cheating: count = recvd; eof = 0; goto out; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, @@ -1690,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, result->op_status = status; if (status != NFS3_OK) goto out_status; - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2); + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2); error = decode_read3resok(xdr, result); out: return error; @@ -1731,22 +1670,18 @@ static int decode_write3resok(struct xdr_stream *xdr, __be32 *p; p = xdr_inline_decode(xdr, 4 + 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; result->count = be32_to_cpup(p++); result->verf->committed = be32_to_cpup(p++); if (unlikely(result->verf->committed > NFS_FILE_SYNC)) goto out_badvalue; if (decode_writeverf3(xdr, &result->verf->verifier)) - goto out_eio; + return -EIO; return result->count; out_badvalue: dprintk("NFS: bad stable_how value: %u\n", result->verf->committed); return -EIO; -out_overflow: - print_overflow_msg(__func__, xdr); -out_eio: - return -EIO; } static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, @@ -2010,12 +1945,12 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, u64 new_cookie; p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EAGAIN; if (*p == xdr_zero) { p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EAGAIN; if (*p == xdr_zero) return -EAGAIN; entry->eof = 1; @@ -2051,8 +1986,8 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, /* In fact, a post_op_fh3: */ p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EAGAIN; if (*p != xdr_zero) { error = decode_nfs_fh3(xdr, entry->fh); if (unlikely(error)) { @@ -2069,9 +2004,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EAGAIN; out_truncated: dprintk("NFS: directory entry contains invalid file handle\n"); *entry = old; @@ -2183,8 +2115,8 @@ static int decode_fsstat3resok(struct xdr_stream *xdr, __be32 *p; p = xdr_inline_decode(xdr, 8 * 6 + 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; p = xdr_decode_size3(p, &result->tbytes); p = xdr_decode_size3(p, &result->fbytes); p = xdr_decode_size3(p, &result->abytes); @@ -2193,9 +2125,6 @@ static int decode_fsstat3resok(struct xdr_stream *xdr, xdr_decode_size3(p, &result->afiles); /* ignore invarsec */ return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req, @@ -2255,8 +2184,8 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr, __be32 *p; p = xdr_inline_decode(xdr, 4 * 7 + 8 + 8 + 4); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; result->rtmax = be32_to_cpup(p++); result->rtpref = be32_to_cpup(p++); result->rtmult = be32_to_cpup(p++); @@ -2270,9 +2199,6 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr, /* ignore properties */ result->lease_time = 0; return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req, @@ -2328,15 +2254,12 @@ static int decode_pathconf3resok(struct xdr_stream *xdr, __be32 *p; p = xdr_inline_decode(xdr, 4 * 6); - if (unlikely(p == NULL)) - goto out_overflow; + if (unlikely(!p)) + return -EIO; result->max_link = be32_to_cpup(p++); result->max_namelen = be32_to_cpup(p); /* ignore remaining fields */ return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req, diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h @@ -20,5 +20,8 @@ loff_t nfs42_proc_llseek(struct file *, loff_t, int); int nfs42_proc_layoutstats_generic(struct nfs_server *, struct nfs42_layoutstat_data *); int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t); +int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg, + const struct nfs42_layout_error *errors, + size_t n); #endif /* __LINUX_FS_NFS_NFS4_2_H */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c @@ -672,6 +672,170 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, return 0; } +static struct nfs42_layouterror_data * +nfs42_alloc_layouterror_data(struct pnfs_layout_segment *lseg, gfp_t gfp_flags) +{ + struct nfs42_layouterror_data *data; + struct inode *inode = lseg->pls_layout->plh_inode; + + data = kzalloc(sizeof(*data), gfp_flags); + if (data) { + data->args.inode = data->inode = nfs_igrab_and_active(inode); + if (data->inode) { + data->lseg = pnfs_get_lseg(lseg); + if (data->lseg) + return data; + nfs_iput_and_deactive(data->inode); + } + kfree(data); + } + return NULL; +} + +static void +nfs42_free_layouterror_data(struct nfs42_layouterror_data *data) +{ + pnfs_put_lseg(data->lseg); + nfs_iput_and_deactive(data->inode); + kfree(data); +} + +static void +nfs42_layouterror_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs42_layouterror_data *data = calldata; + struct inode *inode = data->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layout_hdr *lo = data->lseg->pls_layout; + unsigned i; + + spin_lock(&inode->i_lock); + if (!pnfs_layout_is_valid(lo)) { + spin_unlock(&inode->i_lock); + rpc_exit(task, 0); + return; + } + for (i = 0; i < data->args.num_errors; i++) + nfs4_stateid_copy(&data->args.errors[i].stateid, + &lo->plh_stateid); + spin_unlock(&inode->i_lock); + nfs4_setup_sequence(server->nfs_client, &data->args.seq_args, + &data->res.seq_res, task); +} + +static void +nfs42_layouterror_done(struct rpc_task *task, void *calldata) +{ + struct nfs42_layouterror_data *data = calldata; + struct inode *inode = data->inode; + struct pnfs_layout_hdr *lo = data->lseg->pls_layout; + + if (!nfs4_sequence_done(task, &data->res.seq_res)) + return; + + switch (task->tk_status) { + case 0: + break; + case -NFS4ERR_BADHANDLE: + case -ESTALE: + pnfs_destroy_layout(NFS_I(inode)); + break; + case -NFS4ERR_EXPIRED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: + spin_lock(&inode->i_lock); + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match(&data->args.errors[0].stateid, + &lo->plh_stateid)) { + LIST_HEAD(head); + + /* + * Mark the bad layout state as invalid, then retry + * with the current stateid. + */ + pnfs_mark_layout_stateid_invalid(lo, &head); + spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&head); + nfs_commit_inode(inode, 0); + } else + spin_unlock(&inode->i_lock); + break; + case -NFS4ERR_OLD_STATEID: + spin_lock(&inode->i_lock); + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match_other(&data->args.errors[0].stateid, + &lo->plh_stateid)) { + /* Do we need to delay before resending? */ + if (!nfs4_stateid_is_newer(&lo->plh_stateid, + &data->args.errors[0].stateid)) + rpc_delay(task, HZ); + rpc_restart_call_prepare(task); + } + spin_unlock(&inode->i_lock); + break; + case -ENOTSUPP: + case -EOPNOTSUPP: + NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTERROR; + } +} + +static void +nfs42_layouterror_release(void *calldata) +{ + struct nfs42_layouterror_data *data = calldata; + + nfs42_free_layouterror_data(data); +} + +static const struct rpc_call_ops nfs42_layouterror_ops = { + .rpc_call_prepare = nfs42_layouterror_prepare, + .rpc_call_done = nfs42_layouterror_done, + .rpc_release = nfs42_layouterror_release, +}; + +int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg, + const struct nfs42_layout_error *errors, size_t n) +{ + struct inode *inode = lseg->pls_layout->plh_inode; + struct nfs42_layouterror_data *data; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTERROR], + }; + struct rpc_task_setup task_setup = { + .rpc_message = &msg, + .callback_ops = &nfs42_layouterror_ops, + .flags = RPC_TASK_ASYNC, + }; + unsigned int i; + + if (!nfs_server_capable(inode, NFS_CAP_LAYOUTERROR)) + return -EOPNOTSUPP; + if (n > NFS42_LAYOUTERROR_MAX) + return -EINVAL; + data = nfs42_alloc_layouterror_data(lseg, GFP_NOFS); + if (!data) + return -ENOMEM; + for (i = 0; i < n; i++) { + data->args.errors[i] = errors[i]; + data->args.num_errors++; + data->res.num_errors++; + } + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + task_setup.callback_data = data; + task_setup.rpc_client = NFS_SERVER(inode)->client; + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0, 0); + task = rpc_run_task(&task_setup); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; +} +EXPORT_SYMBOL_GPL(nfs42_proc_layouterror); + static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, struct file *dst_f, struct nfs_lock_context *src_lock, struct nfs_lock_context *dst_lock, loff_t src_offset, diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c @@ -51,6 +51,15 @@ 1 /* opaque devaddr4 length */ + \ XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) #define decode_layoutstats_maxsz (op_decode_hdr_maxsz) +#define encode_device_error_maxsz (XDR_QUADLEN(NFS4_DEVICEID4_SIZE) + \ + 1 /* status */ + 1 /* opnum */) +#define encode_layouterror_maxsz (op_decode_hdr_maxsz + \ + 2 /* offset */ + \ + 2 /* length */ + \ + encode_stateid_maxsz + \ + 1 /* Array size */ + \ + encode_device_error_maxsz) +#define decode_layouterror_maxsz (op_decode_hdr_maxsz) #define encode_clone_maxsz (encode_stateid_maxsz + \ encode_stateid_maxsz + \ 2 /* src offset */ + \ @@ -59,43 +68,53 @@ #define decode_clone_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_allocate_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_allocate_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_copy_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_putfh_maxsz + \ encode_copy_maxsz + \ encode_commit_maxsz) #define NFS4_dec_copy_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_putfh_maxsz + \ decode_copy_maxsz + \ decode_commit_maxsz) #define NFS4_enc_offload_cancel_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_offload_cancel_maxsz) #define NFS4_dec_offload_cancel_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_offload_cancel_maxsz) #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_deallocate_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_deallocate_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_deallocate_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_seek_maxsz) #define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_seek_maxsz) #define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \ @@ -106,6 +125,16 @@ decode_sequence_maxsz + \ decode_putfh_maxsz + \ PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) +#define NFS4_enc_layouterror_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + NFS42_LAYOUTERROR_MAX * \ + encode_layouterror_maxsz) +#define NFS4_dec_layouterror_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + NFS42_LAYOUTERROR_MAX * \ + decode_layouterror_maxsz) #define NFS4_enc_clone_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -223,6 +252,34 @@ static void encode_clone(struct xdr_stream *xdr, xdr_encode_hyper(p, args->count); } +static void encode_device_error(struct xdr_stream *xdr, + const struct nfs42_device_error *error) +{ + __be32 *p; + + p = reserve_space(xdr, NFS4_DEVICEID4_SIZE + 2*4); + p = xdr_encode_opaque_fixed(p, error->dev_id.data, + NFS4_DEVICEID4_SIZE); + *p++ = cpu_to_be32(error->status); + *p = cpu_to_be32(error->opnum); +} + +static void encode_layouterror(struct xdr_stream *xdr, + const struct nfs42_layout_error *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_LAYOUTERROR, decode_layouterror_maxsz, hdr); + p = reserve_space(xdr, 8 + 8); + p = xdr_encode_hyper(p, args->offset); + p = xdr_encode_hyper(p, args->length); + encode_nfs4_stateid(xdr, &args->stateid); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(1); + encode_device_error(xdr, &args->errors[0]); +} + /* * Encode ALLOCATE request */ @@ -381,6 +438,27 @@ static void nfs4_xdr_enc_clone(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode LAYOUTERROR request + */ +static void nfs4_xdr_enc_layouterror(struct rpc_rqst *req, + struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_layouterror_args *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + int i; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, NFS_FH(args->inode), &hdr); + for (i = 0; i < args->num_errors; i++) + encode_layouterror(xdr, &args->errors[i], &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -394,7 +472,7 @@ static int decode_write_response(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; count = be32_to_cpup(p); if (count > 1) return -EREMOTEIO; @@ -402,18 +480,14 @@ static int decode_write_response(struct xdr_stream *xdr, status = decode_opaque_fixed(xdr, &res->stateid, NFS4_STATEID_SIZE); if (unlikely(status)) - goto out_overflow; + return -EIO; } p = xdr_inline_decode(xdr, 8 + 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; p = xdr_decode_hyper(p, &res->count); res->verifier.committed = be32_to_cpup(p); return decode_verifier(xdr, &res->verifier.verifier); - -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_copy_requirements(struct xdr_stream *xdr, @@ -422,14 +496,11 @@ static int decode_copy_requirements(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, 4 + 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; res->consecutive = be32_to_cpup(p++); res->synchronous = be32_to_cpup(p++); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_copy(struct xdr_stream *xdr, struct nfs42_copy_res *res) @@ -474,15 +545,11 @@ static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) p = xdr_inline_decode(xdr, 4 + 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; res->sr_eof = be32_to_cpup(p++); p = xdr_decode_hyper(p, &res->sr_offset); return 0; - -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_layoutstats(struct xdr_stream *xdr) @@ -495,6 +562,11 @@ static int decode_clone(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_CLONE); } +static int decode_layouterror(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_LAYOUTERROR); +} + /* * Decode ALLOCATE request */ @@ -704,4 +776,30 @@ out: return status; } +/* + * Decode LAYOUTERROR request + */ +static int nfs4_xdr_dec_layouterror(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfs42_layouterror_res *res = data; + struct compound_hdr hdr; + int status, i; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + + for (i = 0; i < res->num_errors && status == 0; i++) + status = decode_layouterror(xdr); +out: + res->rpc_status = status; + return status; +} + #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c @@ -42,7 +42,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) } #ifdef CONFIG_NFS_V4_1 -/** +/* * Per auth flavor data server rpc clients */ struct nfs4_ds_server { @@ -51,7 +51,9 @@ struct nfs4_ds_server { }; /** - * Common lookup case for DS I/O + * nfs4_find_ds_client - Common lookup case for DS I/O + * @ds_clp: pointer to the DS's nfs_client + * @flavor: rpc auth flavour to match */ static struct nfs4_ds_server * nfs4_find_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor) @@ -118,9 +120,13 @@ nfs4_free_ds_server(struct nfs4_ds_server *dss) } /** -* Find or create a DS rpc client with th MDS server rpc client auth flavor -* in the nfs_client cl_ds_clients list. -*/ + * nfs4_find_or_create_ds_client - Find or create a DS rpc client + * @ds_clp: pointer to the DS's nfs_client + * @inode: pointer to the inode + * + * Find or create a DS rpc client with th MDS server rpc client auth flavor + * in the nfs_client cl_ds_clients list. + */ struct rpc_clnt * nfs4_find_or_create_ds_client(struct nfs_client *ds_clp, struct inode *inode) { @@ -145,7 +151,6 @@ static void nfs4_shutdown_ds_clients(struct nfs_client *clp) { struct nfs4_ds_server *dss; - LIST_HEAD(shutdown_list); while (!list_empty(&clp->cl_ds_clients)) { dss = list_entry(clp->cl_ds_clients.next, @@ -284,7 +289,7 @@ static int nfs4_init_callback(struct nfs_client *clp) /** * nfs40_init_client - nfs_client initialization tasks for NFSv4.0 - * @clp - nfs_client to initialize + * @clp: nfs_client to initialize * * Returns zero on success, or a negative errno if some error occurred. */ @@ -312,7 +317,7 @@ int nfs40_init_client(struct nfs_client *clp) /** * nfs41_init_client - nfs_client initialization tasks for NFSv4.1+ - * @clp - nfs_client to initialize + * @clp: nfs_client to initialize * * Returns zero on success, or a negative errno if some error occurred. */ @@ -360,9 +365,7 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp) * nfs4_init_client - Initialise an NFS4 client record * * @clp: nfs_client to initialise - * @timeparms: timeout parameters for underlying RPC transport - * @ip_addr: callback IP address in presentation format - * @authflavor: authentication flavor for underlying RPC transport + * @cl_init: pointer to nfs_client_initdata * * Returns pointer to an NFS client, or an ERR_PTR value. */ @@ -649,13 +652,13 @@ nfs4_check_server_scope(struct nfs41_server_scope *s1, /** * nfs4_detect_session_trunking - Checks for session trunking. - * - * Called after a successful EXCHANGE_ID on a multi-addr connection. - * Upon success, add the transport. - * * @clp: original mount nfs_client * @res: result structure from an exchange_id using the original mount * nfs_client with a new multi_addr transport + * @xprt: pointer to the transport to add. + * + * Called after a successful EXCHANGE_ID on a multi-addr connection. + * Upon success, add the transport. * * Returns zero on success, otherwise -EINVAL * diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c @@ -137,6 +137,7 @@ static size_t nfs_parse_server_name(char *string, size_t len, /** * nfs_find_best_sec - Find a security mechanism supported locally + * @clnt: pointer to rpc_clnt * @server: NFS server struct * @flavors: List of security tuples returned by SECINFO procedure * @@ -288,8 +289,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, /** * nfs_follow_referral - set up mountpoint when hitting a referral on moved error - * @dentry - parent directory - * @locations - array of NFSv4 server location information + * @dentry: parent directory + * @locations: array of NFSv4 server location information * */ static struct vfsmount *nfs_follow_referral(struct dentry *dentry, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c @@ -730,33 +730,41 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) res->sr_slot = NULL; } +static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot, + u32 seqnr) +{ + if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0) + slot->seq_nr_highest_sent = seqnr; +} +static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, + u32 seqnr) +{ + slot->seq_nr_highest_sent = seqnr; + slot->seq_nr_last_acked = seqnr; +} + static int nfs41_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) { struct nfs4_session *session; struct nfs4_slot *slot = res->sr_slot; struct nfs_client *clp; - bool interrupted = false; int ret = 1; if (slot == NULL) goto out_noaction; /* don't increment the sequence number if the task wasn't sent */ - if (!RPC_WAS_SENT(task)) + if (!RPC_WAS_SENT(task) || slot->seq_done) goto out; session = slot->table->session; - if (slot->interrupted) { - if (res->sr_status != -NFS4ERR_DELAY) - slot->interrupted = 0; - interrupted = true; - } - trace_nfs4_sequence_done(session, res); /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: + /* Mark this sequence number as having been acked */ + nfs4_slot_sequence_acked(slot, slot->seq_nr); /* Update the slot's sequence and clientid lease timer */ slot->seq_done = 1; clp = session->clp; @@ -771,9 +779,9 @@ static int nfs41_sequence_process(struct rpc_task *task, * sr_status remains 1 if an RPC level error occurred. * The server may or may not have processed the sequence * operation.. - * Mark the slot as having hosted an interrupted RPC call. */ - slot->interrupted = 1; + nfs4_slot_sequence_record_sent(slot, slot->seq_nr); + slot->seq_done = 1; goto out; case -NFS4ERR_DELAY: /* The server detected a resend of the RPC call and @@ -784,6 +792,7 @@ static int nfs41_sequence_process(struct rpc_task *task, __func__, slot->slot_nr, slot->seq_nr); + nfs4_slot_sequence_acked(slot, slot->seq_nr); goto out_retry; case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_SEQ_FALSE_RETRY: @@ -791,6 +800,7 @@ static int nfs41_sequence_process(struct rpc_task *task, * The server thinks we tried to replay a request. * Retry the call after bumping the sequence ID. */ + nfs4_slot_sequence_acked(slot, slot->seq_nr); goto retry_new_seq; case -NFS4ERR_BADSLOT: /* @@ -801,21 +811,28 @@ static int nfs41_sequence_process(struct rpc_task *task, goto session_recover; goto retry_nowait; case -NFS4ERR_SEQ_MISORDERED: + nfs4_slot_sequence_record_sent(slot, slot->seq_nr); /* - * Was the last operation on this sequence interrupted? - * If so, retry after bumping the sequence number. - */ - if (interrupted) - goto retry_new_seq; - /* - * Could this slot have been previously retired? - * If so, then the server may be expecting seq_nr = 1! + * Were one or more calls using this slot interrupted? + * If the server never received the request, then our + * transmitted slot sequence number may be too high. */ - if (slot->seq_nr != 1) { - slot->seq_nr = 1; + if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) { + slot->seq_nr--; goto retry_nowait; } - goto session_recover; + /* + * RFC5661: + * A retry might be sent while the original request is + * still in progress on the replier. The replier SHOULD + * deal with the issue by returning NFS4ERR_DELAY as the + * reply to SEQUENCE or CB_SEQUENCE operation, but + * implementations MAY return NFS4ERR_SEQ_MISORDERED. + * + * Restart the search after a delay. + */ + slot->seq_nr = slot->seq_nr_highest_sent; + goto out_retry; default: /* Just update the slot sequence no. */ slot->seq_done = 1; @@ -906,17 +923,6 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_done = nfs41_call_sync_done, }; -static void -nfs4_sequence_process_interrupted(struct nfs_client *client, - struct nfs4_slot *slot, const struct cred *cred) -{ - struct rpc_task *task; - - task = _nfs41_proc_sequence(client, cred, slot, true); - if (!IS_ERR(task)) - rpc_put_task_async(task); -} - #else /* !CONFIG_NFS_V4_1 */ static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) @@ -937,16 +943,15 @@ int nfs4_sequence_done(struct rpc_task *task, } EXPORT_SYMBOL_GPL(nfs4_sequence_done); -static void -nfs4_sequence_process_interrupted(struct nfs_client *client, - struct nfs4_slot *slot, const struct cred *cred) +#endif /* !CONFIG_NFS_V4_1 */ + +static void nfs41_sequence_res_init(struct nfs4_sequence_res *res) { - WARN_ON_ONCE(1); - slot->interrupted = 0; + res->sr_timestamp = jiffies; + res->sr_status_flags = 0; + res->sr_status = 1; } -#endif /* !CONFIG_NFS_V4_1 */ - static void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, @@ -958,10 +963,6 @@ void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args, args->sa_slot = slot; res->sr_slot = slot; - res->sr_timestamp = jiffies; - res->sr_status_flags = 0; - res->sr_status = 1; - } int nfs4_setup_sequence(struct nfs_client *client, @@ -982,31 +983,25 @@ int nfs4_setup_sequence(struct nfs_client *client, task->tk_timeout = 0; } - for (;;) { - spin_lock(&tbl->slot_tbl_lock); - /* The state manager will wait until the slot table is empty */ - if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) - goto out_sleep; - - slot = nfs4_alloc_slot(tbl); - if (IS_ERR(slot)) { - /* Try again in 1/4 second */ - if (slot == ERR_PTR(-ENOMEM)) - task->tk_timeout = HZ >> 2; - goto out_sleep; - } - spin_unlock(&tbl->slot_tbl_lock); + spin_lock(&tbl->slot_tbl_lock); + /* The state manager will wait until the slot table is empty */ + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) + goto out_sleep; - if (likely(!slot->interrupted)) - break; - nfs4_sequence_process_interrupted(client, - slot, task->tk_msg.rpc_cred); + slot = nfs4_alloc_slot(tbl); + if (IS_ERR(slot)) { + /* Try again in 1/4 second */ + if (slot == ERR_PTR(-ENOMEM)) + task->tk_timeout = HZ >> 2; + goto out_sleep; } + spin_unlock(&tbl->slot_tbl_lock); nfs4_sequence_attach_slot(args, res, slot); trace_nfs4_setup_sequence(session, args); out_start: + nfs41_sequence_res_init(res); rpc_call_start(task); return 0; @@ -1555,6 +1550,10 @@ static void nfs_clear_open_stateid(struct nfs4_state *state, static void nfs_set_open_stateid_locked(struct nfs4_state *state, const nfs4_stateid *stateid, nfs4_stateid *freeme) + __must_hold(&state->owner->so_lock) + __must_hold(&state->seqlock) + __must_hold(RCU) + { DEFINE_WAIT(wait); int status = 0; @@ -5963,7 +5962,7 @@ out: /** * nfs4_proc_setclientid_confirm - Confirm client ID * @clp: state data structure - * @res: result of a previous SETCLIENTID + * @arg: result of a previous SETCLIENTID * @cred: credential to use for this call * * Returns zero, a negative errno, or a negative NFS4ERR status code. @@ -7527,7 +7526,7 @@ int nfs4_proc_fsid_present(struct inode *inode, const struct cred *cred) return status; } -/** +/* * If 'use_integrity' is true and the state managment nfs_client * cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient * and the machine credential as per RFC3530bis and RFC5661 Security @@ -8937,10 +8936,12 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) if (status != 0) goto out; - /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ - if (task->tk_status < 0 || lgp->res.layoutp->len == 0) { + if (task->tk_status < 0) { status = nfs4_layoutget_handle_exception(task, lgp, &exception); *timeout = exception.timeout; + } else if (lgp->res.layoutp->len == 0) { + status = -EAGAIN; + *timeout = nfs4_update_delay(&exception.timeout); } else lseg = pnfs_layout_process(lgp); out: @@ -9219,7 +9220,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) return status; } -/** +/* * Use the state managment nfs_client cl_rpcclient, which uses krb5i (if * possible) as per RFC3530bis and RFC5661 Security Considerations sections */ @@ -9484,7 +9485,7 @@ static const struct rpc_call_ops nfs41_free_stateid_ops = { * @server: server / transport on which to perform the operation * @stateid: state ID to release * @cred: credential - * @is_recovery: set to true if this call needs to be privileged + * @privileged: set to true if this call needs to be privileged * * Note: this function is always asynchronous. */ @@ -9691,7 +9692,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_DEALLOCATE | NFS_CAP_SEEK | NFS_CAP_LAYOUTSTATS - | NFS_CAP_CLONE, + | NFS_CAP_CLONE + | NFS_CAP_LAYOUTERROR, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c @@ -55,7 +55,7 @@ static void nfs4_shrink_slot_table(struct nfs4_slot_table *tbl, u32 newsize) /** * nfs4_slot_tbl_drain_complete - wake waiters when drain is complete - * @tbl - controlling slot table + * @tbl: controlling slot table * */ void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl) @@ -110,6 +110,8 @@ static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl, slot->table = tbl; slot->slot_nr = slotid; slot->seq_nr = seq_init; + slot->seq_nr_highest_sent = seq_init; + slot->seq_nr_last_acked = seq_init - 1; } return slot; } @@ -276,7 +278,8 @@ static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl, p = &tbl->slots; while (*p) { (*p)->seq_nr = ivalue; - (*p)->interrupted = 0; + (*p)->seq_nr_highest_sent = ivalue; + (*p)->seq_nr_last_acked = ivalue - 1; p = &(*p)->next; } tbl->highest_used_slotid = NFS4_NO_SLOT; diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h @@ -10,7 +10,7 @@ /* maximum number of slots to use */ #define NFS4_DEF_SLOT_TABLE_SIZE (64U) -#define NFS4_DEF_CB_SLOT_TABLE_SIZE (1U) +#define NFS4_DEF_CB_SLOT_TABLE_SIZE (16U) #define NFS4_MAX_SLOT_TABLE (1024U) #define NFS4_NO_SLOT ((u32)-1) @@ -23,8 +23,9 @@ struct nfs4_slot { unsigned long generation; u32 slot_nr; u32 seq_nr; - unsigned int interrupted : 1, - privileged : 1, + u32 seq_nr_last_acked; + u32 seq_nr_highest_sent; + unsigned int privileged : 1, seq_done : 1; }; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c @@ -563,6 +563,7 @@ static void nfs4_gc_state_owners(struct nfs_server *server) * nfs4_get_state_owner - Look up a state owner given a credential * @server: nfs_server to search * @cred: RPC credential to match + * @gfp_flags: allocation mode * * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL. */ diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h @@ -524,6 +524,31 @@ TRACE_EVENT(nfs4_setup_sequence, ) ); +TRACE_EVENT(nfs4_xdr_status, + TP_PROTO( + u32 op, + int error + ), + + TP_ARGS(op, error), + + TP_STRUCT__entry( + __field(u32, op) + __field(int, error) + ), + + TP_fast_assign( + __entry->op = op; + __entry->error = -error; + ), + + TP_printk( + "operation %d: nfs status %d (%s)", + __entry->op, + __entry->error, show_nfsv4_errors(__entry->error) + ) +); + DECLARE_EVENT_CLASS(nfs4_open_event, TP_PROTO( const struct nfs_open_context *ctx, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c @@ -54,6 +54,7 @@ #include <linux/nfs_fs.h> #include "nfs4_fs.h" +#include "nfs4trace.h" #include "internal.h" #include "nfs4idmap.h" #include "nfs4session.h" @@ -214,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, nfs4_fattr_bitmap_maxsz) #define encode_read_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 3) -#define decode_read_maxsz (op_decode_hdr_maxsz + 2) +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1) #define encode_readdir_maxsz (op_encode_hdr_maxsz + \ 2 + encode_verifier_maxsz + 5 + \ nfs4_label_maxsz) #define decode_readdir_maxsz (op_decode_hdr_maxsz + \ - decode_verifier_maxsz) + decode_verifier_maxsz + 1) #define encode_readlink_maxsz (op_encode_hdr_maxsz) -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1) +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1) #define encode_write_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 4) #define decode_write_maxsz (op_decode_hdr_maxsz + \ @@ -283,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) #define encode_getacl_maxsz (encode_getattr_maxsz) #define decode_getacl_maxsz (op_decode_hdr_maxsz + \ - nfs4_fattr_bitmap_maxsz + 1) + nfs4_fattr_bitmap_maxsz + 1 + 1) #define encode_setacl_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 3) #define decode_setacl_maxsz (decode_setattr_maxsz) #define encode_fs_locations_maxsz \ (encode_getattr_maxsz) #define decode_fs_locations_maxsz \ - (0) + (1) #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4)) @@ -391,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, 1 /* opaque devaddr4 length */ + \ /* devaddr4 payload is read into page */ \ 1 /* notification bitmap length */ + \ - 1 /* notification bitmap, word 0 */) + 1 /* notification bitmap, word 0 */ + \ + 1 /* possible XDR padding */) #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ encode_stateid_maxsz) #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ decode_stateid_maxsz + \ - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1) #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \ 2 /* offset */ + \ 2 /* length */ + \ @@ -1015,12 +1017,11 @@ static void encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; - struct rpc_auth *auth = req->rq_cred->cr_auth; /* initialize running count of expected bytes in reply. * NOTE: the replied tag SHOULD be the same is the one sent, * but this is not required as a MUST for the server to do so. */ - hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; + hdr->replen = 3 + hdr->taglen; WARN_ON_ONCE(hdr->taglen > NFS4_MAXTAGLEN); encode_string(xdr, hdr->taglen, hdr->tag); @@ -2340,9 +2341,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); if (args->lg_args) { encode_layoutget(xdr, args->lg_args, &hdr); - xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, - args->lg_args->layout.pages, - 0, args->lg_args->layout.pglen); + rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0, + args->lg_args->layout.pglen, + hdr.replen); } encode_nops(&hdr); } @@ -2386,9 +2387,9 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); if (args->lg_args) { encode_layoutget(xdr, args->lg_args, &hdr); - xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, - args->lg_args->layout.pages, - 0, args->lg_args->layout.pglen); + rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0, + args->lg_args->layout.pglen, + hdr.replen); } encode_nops(&hdr); } @@ -2498,8 +2499,8 @@ static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_readlink(xdr, args, req, &hdr); - xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, - args->pgbase, args->pglen); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->pglen, hdr.replen); encode_nops(&hdr); } @@ -2519,11 +2520,8 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_readdir(xdr, args, req, &hdr); - xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, - args->pgbase, args->count); - dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", - __func__, hdr.replen << 2, args->pages, - args->pgbase, args->count); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->count, hdr.replen); encode_nops(&hdr); } @@ -2543,8 +2541,8 @@ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_read(xdr, args, &hdr); - xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, - args->pages, args->pgbase, args->count); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->count, hdr.replen); req->rq_rcv_buf.flags |= XDRBUF_READ; encode_nops(&hdr); } @@ -2590,9 +2588,8 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_getattr(xdr, nfs4_acl_bitmap, NULL, ARRAY_SIZE(nfs4_acl_bitmap), &hdr); - xdr_inline_pages(&req->rq_rcv_buf, replen << 2, - args->acl_pages, 0, args->acl_len); - + rpc_prepare_reply_pages(req, args->acl_pages, 0, + args->acl_len, replen); encode_nops(&hdr); } @@ -2813,9 +2810,8 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, encode_fs_locations(xdr, args->bitmask, &hdr); } - /* Set up reply kvec to capture returned fs_locations array. */ - xdr_inline_pages(&req->rq_rcv_buf, replen << 2, - (struct page **)&args->page, 0, PAGE_SIZE); + rpc_prepare_reply_pages(req, (struct page **)&args->page, 0, + PAGE_SIZE, replen); encode_nops(&hdr); } @@ -3017,10 +3013,8 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, /* set up reply kvec. Subtract notification bitmap max size (2) * so that notification bitmap is put in xdr_buf tail */ - xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2, - args->pdev->pages, args->pdev->pgbase, - args->pdev->pglen); - + rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase, + args->pdev->pglen, hdr.replen - 2); encode_nops(&hdr); } @@ -3041,9 +3035,8 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, encode_putfh(xdr, NFS_FH(args->inode), &hdr); encode_layoutget(xdr, args, &hdr); - xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, - args->layout.pages, 0, args->layout.pglen); - + rpc_prepare_reply_pages(req, args->layout.pages, 0, + args->layout.pglen, hdr.replen); encode_nops(&hdr); } @@ -3144,22 +3137,12 @@ static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req, } #endif /* CONFIG_NFS_V4_1 */ -static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) -{ - dprintk("nfs: %s: prematurely hit end of receive buffer. " - "Remaining buffer length is %tu words.\n", - func, xdr->end - xdr->p); -} - static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string) { ssize_t ret = xdr_stream_decode_opaque_inline(xdr, (void **)string, NFS4_OPAQUE_LIMIT); - if (unlikely(ret < 0)) { - if (ret == -EBADMSG) - print_overflow_msg(__func__, xdr); + if (unlikely(ret < 0)) return -EIO; - } *len = ret; return 0; } @@ -3170,22 +3153,19 @@ static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; hdr->status = be32_to_cpup(p++); hdr->taglen = be32_to_cpup(p); p = xdr_inline_decode(xdr, hdr->taglen + 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; hdr->tag = (char *)p; p += XDR_QUADLEN(hdr->taglen); hdr->nops = be32_to_cpup(p); if (unlikely(hdr->nops < 1)) return nfs4_stat_to_errno(hdr->status); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected, @@ -3201,11 +3181,14 @@ static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected, opnum = be32_to_cpup(p++); if (unlikely(opnum != expected)) goto out_bad_operation; + if (unlikely(*p != cpu_to_be32(NFS_OK))) + goto out_status; + *nfs_retval = 0; + return true; +out_status: nfserr = be32_to_cpup(p); - if (nfserr == NFS_OK) - *nfs_retval = 0; - else - *nfs_retval = nfs4_stat_to_errno(nfserr); + trace_nfs4_xdr_status(opnum, nfserr); + *nfs_retval = nfs4_stat_to_errno(nfserr); return true; out_bad_operation: dprintk("nfs: Server returned operation" @@ -3214,7 +3197,6 @@ out_bad_operation: *nfs_retval = -EREMOTEIO; return false; out_overflow: - print_overflow_msg(__func__, xdr); *nfs_retval = -EIO; return false; } @@ -3235,10 +3217,9 @@ static int decode_ace(struct xdr_stream *xdr, void *ace) char *str; p = xdr_inline_decode(xdr, 12); - if (likely(p)) - return decode_opaque_inline(xdr, &strlen, &str); - print_overflow_msg(__func__, xdr); - return -EIO; + if (unlikely(!p)) + return -EIO; + return decode_opaque_inline(xdr, &strlen, &str); } static ssize_t @@ -3249,10 +3230,9 @@ decode_bitmap4(struct xdr_stream *xdr, uint32_t *bitmap, size_t sz) ret = xdr_stream_decode_uint32_array(xdr, bitmap, sz); if (likely(ret >= 0)) return ret; - if (ret == -EMSGSIZE) - return sz; - print_overflow_msg(__func__, xdr); - return -EIO; + if (ret != -EMSGSIZE) + return -EIO; + return sz; } static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) @@ -3268,13 +3248,10 @@ static int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, unsigne p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; *attrlen = be32_to_cpup(p); *savep = xdr_stream_pos(xdr); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) @@ -3303,7 +3280,7 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t * if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; *type = be32_to_cpup(p); if (*type < NF4REG || *type > NF4NAMEDATTR) { dprintk("%s: bad type %d\n", __func__, *type); @@ -3314,9 +3291,6 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t * } dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]); return ret; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_fh_expire_type(struct xdr_stream *xdr, @@ -3330,15 +3304,12 @@ static int decode_attr_fh_expire_type(struct xdr_stream *xdr, if (likely(bitmap[0] & FATTR4_WORD0_FH_EXPIRE_TYPE)) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; *type = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_FH_EXPIRE_TYPE; } dprintk("%s: expire type=0x%x\n", __func__, *type); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) @@ -3352,7 +3323,7 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) { p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, change); bitmap[0] &= ~FATTR4_WORD0_CHANGE; ret = NFS_ATTR_FATTR_CHANGE; @@ -3360,9 +3331,6 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t dprintk("%s: change attribute=%Lu\n", __func__, (unsigned long long)*change); return ret; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size) @@ -3376,16 +3344,13 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t * if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) { p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, size); bitmap[0] &= ~FATTR4_WORD0_SIZE; ret = NFS_ATTR_FATTR_SIZE; } dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size); return ret; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) @@ -3398,15 +3363,12 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT; } dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true"); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) @@ -3419,15 +3381,12 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT; } dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true"); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) @@ -3442,7 +3401,7 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs if (likely(bitmap[0] & FATTR4_WORD0_FSID)) { p = xdr_inline_decode(xdr, 16); if (unlikely(!p)) - goto out_overflow; + return -EIO; p = xdr_decode_hyper(p, &fsid->major); xdr_decode_hyper(p, &fsid->minor); bitmap[0] &= ~FATTR4_WORD0_FSID; @@ -3452,9 +3411,6 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs (unsigned long long)fsid->major, (unsigned long long)fsid->minor); return ret; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) @@ -3467,15 +3423,12 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME; } dprintk("%s: file size=%u\n", __func__, (unsigned int)*res); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap, int32_t *res) @@ -3487,14 +3440,11 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap, int32_t * if (likely(bitmap[0] & FATTR4_WORD0_RDATTR_ERROR)) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; *res = -be32_to_cpup(p); } return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_exclcreat_supported(struct xdr_stream *xdr, @@ -3526,13 +3476,13 @@ static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, stru if (likely(bitmap[0] & FATTR4_WORD0_FILEHANDLE)) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; len = be32_to_cpup(p); if (len > NFS4_FHSIZE) return -EIO; p = xdr_inline_decode(xdr, len); if (unlikely(!p)) - goto out_overflow; + return -EIO; if (fh != NULL) { memcpy(fh->data, p, len); fh->size = len; @@ -3540,9 +3490,6 @@ static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, stru bitmap[0] &= ~FATTR4_WORD0_FILEHANDLE; } return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) @@ -3555,15 +3502,12 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT; } dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) @@ -3577,16 +3521,13 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) { p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, fileid); bitmap[0] &= ~FATTR4_WORD0_FILEID; ret = NFS_ATTR_FATTR_FILEID; } dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); return ret; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) @@ -3600,16 +3541,13 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) { p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, fileid); bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; ret = NFS_ATTR_FATTR_MOUNTED_ON_FILEID; } dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); return ret; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) @@ -3623,15 +3561,12 @@ static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) { p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, res); bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL; } dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res); return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) @@ -3645,15 +3580,12 @@ static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) { p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, res); bitmap[0] &= ~FATTR4_WORD0_FILES_FREE; } dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res); return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) @@ -3667,15 +3599,12 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) { p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, res); bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL; } dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res); return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) @@ -3686,7 +3615,7 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; n = be32_to_cpup(p); if (n == 0) goto root_path; @@ -3718,9 +3647,6 @@ out_eio: dprintk(" status %d", status); status = -EIO; goto out; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res) @@ -3745,7 +3671,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st goto out; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + goto out_eio; n = be32_to_cpup(p); if (n <= 0) goto out_eio; @@ -3758,7 +3684,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st loc = &res->locations[res->nlocations]; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + goto out_eio; m = be32_to_cpup(p); dprintk("%s: servers:\n", __func__); @@ -3796,8 +3722,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st out: dprintk("%s: fs_locations done, error = %d\n", __func__, status); return status; -out_overflow: - print_overflow_msg(__func__, xdr); out_eio: status = -EIO; goto out; @@ -3814,15 +3738,12 @@ static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uin if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) { p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, res); bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE; } dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res); return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink) @@ -3836,15 +3757,12 @@ static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_ if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; *maxlink = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_MAXLINK; } dprintk("%s: maxlink=%u\n", __func__, *maxlink); return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname) @@ -3858,15 +3776,12 @@ static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_ if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; *maxname = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_MAXNAME; } dprintk("%s: maxname=%u\n", __func__, *maxname); return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) @@ -3881,7 +3796,7 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_ uint64_t maxread; p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, &maxread); if (maxread > 0x7FFFFFFF) maxread = 0x7FFFFFFF; @@ -3890,9 +3805,6 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_ } dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res); return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) @@ -3907,7 +3819,7 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32 uint64_t maxwrite; p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, &maxwrite); if (maxwrite > 0x7FFFFFFF) maxwrite = 0x7FFFFFFF; @@ -3916,9 +3828,6 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32 } dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res); return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode) @@ -3933,7 +3842,7 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *m if (likely(bitmap[1] & FATTR4_WORD1_MODE)) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; tmp = be32_to_cpup(p); *mode = tmp & ~S_IFMT; bitmap[1] &= ~FATTR4_WORD1_MODE; @@ -3941,9 +3850,6 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *m } dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode); return ret; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink) @@ -3957,16 +3863,13 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; *nlink = be32_to_cpup(p); bitmap[1] &= ~FATTR4_WORD1_NUMLINKS; ret = NFS_ATTR_FATTR_NLINK; } dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink); return ret; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static ssize_t decode_nfs4_string(struct xdr_stream *xdr, @@ -4011,10 +3914,9 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, return NFS_ATTR_FATTR_OWNER; } out: - if (len != -EBADMSG) - return 0; - print_overflow_msg(__func__, xdr); - return -EIO; + if (len == -EBADMSG) + return -EIO; + return 0; } static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, @@ -4046,10 +3948,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, return NFS_ATTR_FATTR_GROUP; } out: - if (len != -EBADMSG) - return 0; - print_overflow_msg(__func__, xdr); - return -EIO; + if (len == -EBADMSG) + return -EIO; + return 0; } static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev) @@ -4066,7 +3967,7 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; major = be32_to_cpup(p++); minor = be32_to_cpup(p); tmp = MKDEV(major, minor); @@ -4077,9 +3978,6 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde } dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor); return ret; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) @@ -4093,15 +3991,12 @@ static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) { p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, res); bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL; } dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res); return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) @@ -4115,15 +4010,12 @@ static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) { p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, res); bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE; } dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res); return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) @@ -4137,15 +4029,12 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) { p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, res); bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL; } dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res); return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used) @@ -4159,7 +4048,7 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) { p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, used); bitmap[1] &= ~FATTR4_WORD1_SPACE_USED; ret = NFS_ATTR_FATTR_SPACE_USED; @@ -4167,9 +4056,6 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint dprintk("%s: space used=%Lu\n", __func__, (unsigned long long)*used); return ret; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static __be32 * @@ -4189,12 +4075,9 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) p = xdr_inline_decode(xdr, nfstime4_maxsz << 2); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_nfstime4(p, time); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) @@ -4265,19 +4148,19 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, if (likely(bitmap[2] & FATTR4_WORD2_SECURITY_LABEL)) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; lfs = be32_to_cpup(p++); p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; pi = be32_to_cpup(p++); p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; len = be32_to_cpup(p++); p = xdr_inline_decode(xdr, len); if (unlikely(!p)) - goto out_overflow; + return -EIO; if (len < NFS4_MAXLABELLEN) { if (label) { memcpy(label->label, p, len); @@ -4295,10 +4178,6 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__, (char *)label->label, label->len, label->pi, label->lfs); return status; - -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) @@ -4342,14 +4221,11 @@ static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *c p = xdr_inline_decode(xdr, 20); if (unlikely(!p)) - goto out_overflow; + return -EIO; cinfo->atomic = be32_to_cpup(p++); p = xdr_decode_hyper(p, &cinfo->before); xdr_decode_hyper(p, &cinfo->after); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access) @@ -4363,24 +4239,19 @@ static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access) return status; p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; supp = be32_to_cpup(p++); acc = be32_to_cpup(p); *supported = supp; *access = acc; return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len) { ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len); - if (unlikely(ret < 0)) { - print_overflow_msg(__func__, xdr); + if (unlikely(ret < 0)) return -EIO; - } return 0; } @@ -4460,13 +4331,11 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) return status; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; bmlen = be32_to_cpup(p); p = xdr_inline_decode(xdr, bmlen << 2); if (likely(p)) return 0; -out_overflow: - print_overflow_msg(__func__, xdr); return -EIO; } @@ -4574,13 +4443,10 @@ static int decode_threshold_hint(struct xdr_stream *xdr, if (likely(bitmap[0] & hint_bit)) { p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, res); } return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_first_threshold_item4(struct xdr_stream *xdr, @@ -4593,10 +4459,8 @@ static int decode_first_threshold_item4(struct xdr_stream *xdr, /* layout type */ p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) { - print_overflow_msg(__func__, xdr); + if (unlikely(!p)) return -EIO; - } res->l_type = be32_to_cpup(p); /* thi_hintset bitmap */ @@ -4654,7 +4518,7 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr, return -EREMOTEIO; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; num = be32_to_cpup(p); if (num == 0) return 0; @@ -4667,9 +4531,6 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr, bitmap[2] &= ~FATTR4_WORD2_MDSTHRESHOLD; } return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, @@ -4857,7 +4718,7 @@ static int decode_pnfs_layout_types(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; fsinfo->nlayouttypes = be32_to_cpup(p); /* pNFS is not supported by the underlying file system */ @@ -4867,7 +4728,7 @@ static int decode_pnfs_layout_types(struct xdr_stream *xdr, /* Decode and set first layout type, move xdr->p past unused types */ p = xdr_inline_decode(xdr, fsinfo->nlayouttypes * 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; /* If we get too many, then just cap it at the max */ if (fsinfo->nlayouttypes > NFS_MAX_LAYOUT_TYPES) { @@ -4879,9 +4740,6 @@ static int decode_pnfs_layout_types(struct xdr_stream *xdr, for(i = 0; i < fsinfo->nlayouttypes; ++i) fsinfo->layouttype[i] = be32_to_cpup(p++); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* @@ -4915,10 +4773,8 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, *res = 0; if (bitmap[2] & FATTR4_WORD2_LAYOUT_BLKSIZE) { p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) { - print_overflow_msg(__func__, xdr); + if (unlikely(!p)) return -EIO; - } *res = be32_to_cpup(p); bitmap[2] &= ~FATTR4_WORD2_LAYOUT_BLKSIZE; } @@ -4937,10 +4793,8 @@ static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap, *res = 0; if (bitmap[2] & FATTR4_WORD2_CLONE_BLKSIZE) { p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) { - print_overflow_msg(__func__, xdr); + if (unlikely(!p)) return -EIO; - } *res = be32_to_cpup(p); bitmap[2] &= ~FATTR4_WORD2_CLONE_BLKSIZE; } @@ -5016,19 +4870,16 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh) p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; len = be32_to_cpup(p); if (len > NFS4_FHSIZE) return -EIO; fh->size = len; p = xdr_inline_decode(xdr, len); if (unlikely(!p)) - goto out_overflow; + return -EIO; memcpy(fh->data, p, len); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) @@ -5052,7 +4903,7 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) p = xdr_inline_decode(xdr, 32); /* read 32 bytes */ if (unlikely(!p)) - goto out_overflow; + return -EIO; p = xdr_decode_hyper(p, &offset); /* read 2 8-byte long words */ p = xdr_decode_hyper(p, &length); type = be32_to_cpup(p++); /* 4 byte read */ @@ -5069,11 +4920,9 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */ namelen = be32_to_cpup(p); /* read 4 bytes */ /* have read all 32 bytes now */ p = xdr_inline_decode(xdr, namelen); /* variable size field */ - if (likely(p)) - return -NFS4ERR_DENIED; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; + if (likely(!p)) + return -EIO; + return -NFS4ERR_DENIED; } static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) @@ -5142,7 +4991,7 @@ static int decode_space_limit(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, 12); if (unlikely(!p)) - goto out_overflow; + return -EIO; limit_type = be32_to_cpup(p++); switch (limit_type) { case NFS4_LIMIT_SIZE: @@ -5156,9 +5005,6 @@ static int decode_space_limit(struct xdr_stream *xdr, maxsize >>= PAGE_SHIFT; *pagemod_limit = min_t(u64, maxsize, ULONG_MAX); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_rw_delegation(struct xdr_stream *xdr, @@ -5173,7 +5019,7 @@ static int decode_rw_delegation(struct xdr_stream *xdr, return status; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; res->do_recall = be32_to_cpup(p); switch (delegation_type) { @@ -5186,9 +5032,6 @@ static int decode_rw_delegation(struct xdr_stream *xdr, return -EIO; } return decode_ace(xdr, NULL); -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res) @@ -5198,7 +5041,7 @@ static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res) p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; why_no_delegation = be32_to_cpup(p); switch (why_no_delegation) { case WND4_CONTENTION: @@ -5207,9 +5050,6 @@ static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res) /* Ignore for now */ } return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) @@ -5219,7 +5059,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; delegation_type = be32_to_cpup(p); res->delegation_type = 0; switch (delegation_type) { @@ -5232,9 +5072,6 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) return decode_no_delegation(xdr, res); } return -EIO; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) @@ -5256,7 +5093,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; res->rflags = be32_to_cpup(p++); bmlen = be32_to_cpup(p); if (bmlen > 10) @@ -5264,7 +5101,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) p = xdr_inline_decode(xdr, bmlen << 2); if (unlikely(!p)) - goto out_overflow; + return -EIO; savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); for (i = 0; i < savewords; ++i) res->attrset[i] = be32_to_cpup(p++); @@ -5275,9 +5112,6 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) xdr_error: dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen); return -EIO; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) @@ -5326,7 +5160,7 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, return status; p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; eof = be32_to_cpup(p++); count = be32_to_cpup(p); recvd = xdr_read_pages(xdr, count); @@ -5339,9 +5173,6 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, res->eof = eof; res->count = count; return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) @@ -5374,7 +5205,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) /* Convert length of symlink */ p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; len = be32_to_cpup(p); if (len >= rcvbuf->page_len || len <= 0) { dprintk("nfs: server returned giant symlink!\n"); @@ -5395,9 +5226,6 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) */ xdr_terminate_string(rcvbuf, len); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) @@ -5500,7 +5328,6 @@ static int decode_setattr(struct xdr_stream *xdr) return status; if (decode_bitmap4(xdr, NULL, 0) >= 0) return 0; - print_overflow_msg(__func__, xdr); return -EIO; } @@ -5512,7 +5339,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_re p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; opnum = be32_to_cpup(p++); if (opnum != OP_SETCLIENTID) { dprintk("nfs: decode_setclientid: Server returned operation" @@ -5523,7 +5350,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_re if (nfserr == NFS_OK) { p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE); if (unlikely(!p)) - goto out_overflow; + return -EIO; p = xdr_decode_hyper(p, &res->clientid); memcpy(res->confirm.data, p, NFS4_VERIFIER_SIZE); } else if (nfserr == NFSERR_CLID_INUSE) { @@ -5532,28 +5359,25 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_re /* skip netid string */ p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; len = be32_to_cpup(p); p = xdr_inline_decode(xdr, len); if (unlikely(!p)) - goto out_overflow; + return -EIO; /* skip uaddr string */ p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; len = be32_to_cpup(p); p = xdr_inline_decode(xdr, len); if (unlikely(!p)) - goto out_overflow; + return -EIO; return -NFSERR_CLID_INUSE; } else return nfs4_stat_to_errno(nfserr); return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_setclientid_confirm(struct xdr_stream *xdr) @@ -5572,13 +5396,10 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res) p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; res->count = be32_to_cpup(p++); res->verf->committed = be32_to_cpup(p++); return decode_write_verifier(xdr, &res->verf->verifier); -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_delegreturn(struct xdr_stream *xdr) @@ -5594,30 +5415,24 @@ static int decode_secinfo_gss(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; oid_len = be32_to_cpup(p); if (oid_len > GSS_OID_MAX_LEN) - goto out_err; + return -EINVAL; p = xdr_inline_decode(xdr, oid_len); if (unlikely(!p)) - goto out_overflow; + return -EIO; memcpy(flavor->flavor_info.oid.data, p, oid_len); flavor->flavor_info.oid.len = oid_len; p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; flavor->flavor_info.qop = be32_to_cpup(p++); flavor->flavor_info.service = be32_to_cpup(p); return 0; - -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; -out_err: - return -EINVAL; } static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res *res) @@ -5629,7 +5444,7 @@ static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; res->flavors->num_flavors = 0; num_flavors = be32_to_cpup(p); @@ -5641,7 +5456,7 @@ static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; sec_flavor->flavor = be32_to_cpup(p); if (sec_flavor->flavor == RPC_AUTH_GSS) { @@ -5655,9 +5470,6 @@ static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res status = 0; out: return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res) @@ -5711,11 +5523,11 @@ static int decode_exchange_id(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; xdr_decode_hyper(p, &res->clientid); p = xdr_inline_decode(xdr, 12); if (unlikely(!p)) - goto out_overflow; + return -EIO; res->seqid = be32_to_cpup(p++); res->flags = be32_to_cpup(p++); @@ -5739,7 +5551,7 @@ static int decode_exchange_id(struct xdr_stream *xdr, /* server_owner4.so_minor_id */ p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; p = xdr_decode_hyper(p, &res->server_owner->minor_id); /* server_owner4.so_major_id */ @@ -5759,7 +5571,7 @@ static int decode_exchange_id(struct xdr_stream *xdr, /* Implementation Id */ p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; impl_id_count = be32_to_cpup(p++); if (impl_id_count) { @@ -5778,16 +5590,13 @@ static int decode_exchange_id(struct xdr_stream *xdr, /* nii_date */ p = xdr_inline_decode(xdr, 12); if (unlikely(!p)) - goto out_overflow; + return -EIO; p = xdr_decode_hyper(p, &res->impl_id->date.seconds); res->impl_id->date.nseconds = be32_to_cpup(p); /* if there's more than one entry, ignore the rest */ } return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_chan_attrs(struct xdr_stream *xdr, @@ -5798,7 +5607,7 @@ static int decode_chan_attrs(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, 28); if (unlikely(!p)) - goto out_overflow; + return -EIO; val = be32_to_cpup(p++); /* headerpadsz */ if (val) return -EINVAL; /* no support for header padding yet */ @@ -5816,12 +5625,9 @@ static int decode_chan_attrs(struct xdr_stream *xdr, if (nr_attrs == 1) { p = xdr_inline_decode(xdr, 4); /* skip rdma_attrs */ if (unlikely(!p)) - goto out_overflow; + return -EIO; } return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid) @@ -5844,7 +5650,7 @@ static int decode_bind_conn_to_session(struct xdr_stream *xdr, /* dir flags, rdma mode bool */ p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; res->dir = be32_to_cpup(p++); if (res->dir == 0 || res->dir > NFS4_CDFS4_BOTH) @@ -5855,9 +5661,6 @@ static int decode_bind_conn_to_session(struct xdr_stream *xdr, res->use_conn_in_rdma_mode = true; return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_create_session(struct xdr_stream *xdr, @@ -5875,7 +5678,7 @@ static int decode_create_session(struct xdr_stream *xdr, /* seqid, flags */ p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; res->seqid = be32_to_cpup(p++); res->flags = be32_to_cpup(p); @@ -5884,9 +5687,6 @@ static int decode_create_session(struct xdr_stream *xdr, if (!status) status = decode_chan_attrs(xdr, &res->bc_attrs); return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_destroy_session(struct xdr_stream *xdr, void *dummy) @@ -5967,7 +5767,6 @@ out_err: res->sr_status = status; return status; out_overflow: - print_overflow_msg(__func__, xdr); status = -EIO; goto out_err; #else /* CONFIG_NFS_V4_1 */ @@ -5995,7 +5794,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr, if (status == -ETOOSMALL) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; pdev->mincount = be32_to_cpup(p); dprintk("%s: Min count too small. mincnt = %u\n", __func__, pdev->mincount); @@ -6005,7 +5804,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; type = be32_to_cpup(p++); if (type != pdev->layout_type) { dprintk("%s: layout mismatch req: %u pdev: %u\n", @@ -6019,19 +5818,19 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr, */ pdev->mincount = be32_to_cpup(p); if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount) - goto out_overflow; + return -EIO; /* Parse notification bitmap, verifying that it is zero. */ p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; len = be32_to_cpup(p); if (len) { uint32_t i; p = xdr_inline_decode(xdr, 4 * len); if (unlikely(!p)) - goto out_overflow; + return -EIO; res->notification = be32_to_cpup(p++); for (i = 1; i < len; i++) { @@ -6043,9 +5842,6 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr, } } return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, @@ -6115,7 +5911,6 @@ out: res->status = status; return status; out_overflow: - print_overflow_msg(__func__, xdr); status = -EIO; goto out; } @@ -6131,16 +5926,13 @@ static int decode_layoutreturn(struct xdr_stream *xdr, return status; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; res->lrs_present = be32_to_cpup(p); if (res->lrs_present) status = decode_layout_stateid(xdr, &res->stateid); else nfs4_stateid_copy(&res->stateid, &invalid_stateid); return status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_layoutcommit(struct xdr_stream *xdr, @@ -6158,19 +5950,16 @@ static int decode_layoutcommit(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; sizechanged = be32_to_cpup(p); if (sizechanged) { /* throw away new size */ p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) - goto out_overflow; + return -EIO; } return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_test_stateid(struct xdr_stream *xdr, @@ -6186,21 +5975,17 @@ static int decode_test_stateid(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; num_res = be32_to_cpup(p++); if (num_res != 1) - goto out; + return -EIO; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EIO; res->status = be32_to_cpup(p++); return status; -out_overflow: - print_overflow_msg(__func__, xdr); -out: - return -EIO; } static int decode_free_stateid(struct xdr_stream *xdr, @@ -7570,11 +7355,11 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, uint64_t new_cookie; __be32 *p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EAGAIN; if (*p == xdr_zero) { p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) - goto out_overflow; + return -EAGAIN; if (*p == xdr_zero) return -EAGAIN; entry->eof = 1; @@ -7583,13 +7368,13 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, p = xdr_inline_decode(xdr, 12); if (unlikely(!p)) - goto out_overflow; + return -EAGAIN; p = xdr_decode_hyper(p, &new_cookie); entry->len = be32_to_cpup(p); p = xdr_inline_decode(xdr, entry->len); if (unlikely(!p)) - goto out_overflow; + return -EAGAIN; entry->name = (const char *) p; /* @@ -7601,14 +7386,14 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, entry->fattr->valid = 0; if (decode_attr_bitmap(xdr, bitmap) < 0) - goto out_overflow; + return -EAGAIN; if (decode_attr_length(xdr, &len, &savep) < 0) - goto out_overflow; + return -EAGAIN; if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, NULL, entry->label, entry->server) < 0) - goto out_overflow; + return -EAGAIN; if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) entry->ino = entry->fattr->mounted_on_fileid; else if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) @@ -7622,10 +7407,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, entry->cookie = new_cookie; return 0; - -out_overflow: - print_overflow_msg(__func__, xdr); - return -EAGAIN; } /* @@ -7791,6 +7572,7 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC42(COPY, enc_copy, dec_copy), PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel), PROC(LOOKUPP, enc_lookupp, dec_lookupp), + PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror), }; static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)]; diff --git a/fs/nfs/nfstrace.c b/fs/nfs/nfstrace.c @@ -11,3 +11,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit); +EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_status); diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h @@ -969,6 +969,91 @@ TRACE_EVENT(nfs_commit_done, ) ); +TRACE_DEFINE_ENUM(NFS_OK); +TRACE_DEFINE_ENUM(NFSERR_PERM); +TRACE_DEFINE_ENUM(NFSERR_NOENT); +TRACE_DEFINE_ENUM(NFSERR_IO); +TRACE_DEFINE_ENUM(NFSERR_NXIO); +TRACE_DEFINE_ENUM(NFSERR_ACCES); +TRACE_DEFINE_ENUM(NFSERR_EXIST); +TRACE_DEFINE_ENUM(NFSERR_XDEV); +TRACE_DEFINE_ENUM(NFSERR_NODEV); +TRACE_DEFINE_ENUM(NFSERR_NOTDIR); +TRACE_DEFINE_ENUM(NFSERR_ISDIR); +TRACE_DEFINE_ENUM(NFSERR_INVAL); +TRACE_DEFINE_ENUM(NFSERR_FBIG); +TRACE_DEFINE_ENUM(NFSERR_NOSPC); +TRACE_DEFINE_ENUM(NFSERR_ROFS); +TRACE_DEFINE_ENUM(NFSERR_MLINK); +TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG); +TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY); +TRACE_DEFINE_ENUM(NFSERR_DQUOT); +TRACE_DEFINE_ENUM(NFSERR_STALE); +TRACE_DEFINE_ENUM(NFSERR_REMOTE); +TRACE_DEFINE_ENUM(NFSERR_WFLUSH); +TRACE_DEFINE_ENUM(NFSERR_BADHANDLE); +TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC); +TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE); +TRACE_DEFINE_ENUM(NFSERR_NOTSUPP); +TRACE_DEFINE_ENUM(NFSERR_TOOSMALL); +TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT); +TRACE_DEFINE_ENUM(NFSERR_BADTYPE); +TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); + +#define nfs_show_status(x) \ + __print_symbolic(x, \ + { NFS_OK, "OK" }, \ + { NFSERR_PERM, "PERM" }, \ + { NFSERR_NOENT, "NOENT" }, \ + { NFSERR_IO, "IO" }, \ + { NFSERR_NXIO, "NXIO" }, \ + { NFSERR_ACCES, "ACCES" }, \ + { NFSERR_EXIST, "EXIST" }, \ + { NFSERR_XDEV, "XDEV" }, \ + { NFSERR_NODEV, "NODEV" }, \ + { NFSERR_NOTDIR, "NOTDIR" }, \ + { NFSERR_ISDIR, "ISDIR" }, \ + { NFSERR_INVAL, "INVAL" }, \ + { NFSERR_FBIG, "FBIG" }, \ + { NFSERR_NOSPC, "NOSPC" }, \ + { NFSERR_ROFS, "ROFS" }, \ + { NFSERR_MLINK, "MLINK" }, \ + { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \ + { NFSERR_NOTEMPTY, "NOTEMPTY" }, \ + { NFSERR_DQUOT, "DQUOT" }, \ + { NFSERR_STALE, "STALE" }, \ + { NFSERR_REMOTE, "REMOTE" }, \ + { NFSERR_WFLUSH, "WFLUSH" }, \ + { NFSERR_BADHANDLE, "BADHANDLE" }, \ + { NFSERR_NOT_SYNC, "NOTSYNC" }, \ + { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \ + { NFSERR_NOTSUPP, "NOTSUPP" }, \ + { NFSERR_TOOSMALL, "TOOSMALL" }, \ + { NFSERR_SERVERFAULT, "REMOTEIO" }, \ + { NFSERR_BADTYPE, "BADTYPE" }, \ + { NFSERR_JUKEBOX, "JUKEBOX" }) + +TRACE_EVENT(nfs_xdr_status, + TP_PROTO( + int error + ), + + TP_ARGS(error), + + TP_STRUCT__entry( + __field(int, error) + ), + + TP_fast_assign( + __entry->error = error; + ), + + TP_printk( + "error=%d (%s)", + __entry->error, nfs_show_status(__entry->error) + ) +); + #endif /* _TRACE_NFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c @@ -350,7 +350,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page, /** * nfs_unlock_request - Unlock request and wake up sleepers. - * @req: + * @req: pointer to request */ void nfs_unlock_request(struct nfs_page *req) { @@ -368,7 +368,7 @@ void nfs_unlock_request(struct nfs_page *req) /** * nfs_unlock_and_release_request - Unlock request and release the nfs_page - * @req: + * @req: pointer to request */ void nfs_unlock_and_release_request(struct nfs_page *req) { @@ -531,7 +531,6 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free); * nfs_pgio_rpcsetup - Set up arguments for a pageio call * @hdr: The pageio hdr * @count: Number of bytes to read - * @offset: Initial offset * @how: How to commit data (writes only) * @cinfo: Commit information for the call (writes only) */ @@ -634,7 +633,6 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); /** * nfs_pgio_error - Clean up from a pageio error - * @desc: IO descriptor * @hdr: pageio header */ static void nfs_pgio_error(struct nfs_pgio_header *hdr) @@ -768,8 +766,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, pageused = 0; while (!list_empty(head)) { req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); + nfs_list_move_request(req, &hdr->pages); if (!last_page || last_page != req->wb_page) { pageused++; @@ -893,6 +890,7 @@ static bool nfs_match_lock_context(const struct nfs_lock_context *l1, * nfs_can_coalesce_requests - test two requests for compatibility * @prev: pointer to nfs_page * @req: pointer to nfs_page + * @pgio: pointer to nfs_pagio_descriptor * * The nfs_page structures 'prev' and 'req' are compared to ensure that the * page data area they describe is contiguous, and that their RPC @@ -961,8 +959,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, } if (!nfs_can_coalesce_requests(prev, req, desc)) return 0; - nfs_list_remove_request(req); - nfs_list_add_request(req, &mirror->pg_list); + nfs_list_move_request(req, &mirror->pg_list); mirror->pg_count += req->wb_bytes; return 1; } @@ -988,6 +985,16 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) } } +static void +nfs_pageio_cleanup_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + LIST_HEAD(head); + + nfs_list_move_request(req, &head); + desc->pg_completion_ops->error_cleanup(&head, desc->pg_error); +} + /** * nfs_pageio_add_request - Attempt to coalesce a request into a page list. * @desc: destination io descriptor @@ -1025,10 +1032,8 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, nfs_page_group_unlock(req); desc->pg_moreio = 1; nfs_pageio_doio(desc); - if (desc->pg_error < 0) - return 0; - if (mirror->pg_recoalesce) - return 0; + if (desc->pg_error < 0 || mirror->pg_recoalesce) + goto out_cleanup_subreq; /* retry add_request for this subreq */ nfs_page_group_lock(req); continue; @@ -1061,6 +1066,10 @@ err_ptr: desc->pg_error = PTR_ERR(subreq); nfs_page_group_unlock(req); return 0; +out_cleanup_subreq: + if (req != subreq) + nfs_pageio_cleanup_request(desc, subreq); + return 0; } static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) @@ -1079,7 +1088,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) struct nfs_page *req; req = list_first_entry(&head, struct nfs_page, wb_list); - nfs_list_remove_request(req); if (__nfs_pageio_add_request(desc, req)) continue; if (desc->pg_error < 0) { @@ -1120,7 +1128,8 @@ static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc) for (midx = 0; midx < desc->pg_mirror_count; midx++) { mirror = &desc->pg_mirrors[midx]; - desc->pg_completion_ops->error_cleanup(&mirror->pg_list); + desc->pg_completion_ops->error_cleanup(&mirror->pg_list, + desc->pg_error); } } @@ -1168,11 +1177,14 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (nfs_pgio_has_mirroring(desc)) desc->pg_mirror_idx = midx; if (!nfs_pageio_add_request_mirror(desc, dupreq)) - goto out_failed; + goto out_cleanup_subreq; } return 1; +out_cleanup_subreq: + if (req != dupreq) + nfs_pageio_cleanup_request(desc, dupreq); out_failed: nfs_pageio_error_cleanup(desc); return 0; @@ -1194,7 +1206,7 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, desc->pg_mirror_idx = mirror_idx; for (;;) { nfs_pageio_doio(desc); - if (!mirror->pg_recoalesce) + if (desc->pg_error < 0 || !mirror->pg_recoalesce) break; if (!nfs_do_recoalesce(desc)) break; @@ -1222,9 +1234,8 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, while (!list_empty(&hdr->pages)) { struct nfs_page *req = nfs_list_entry(hdr->pages.next); - nfs_list_remove_request(req); if (!nfs_pageio_add_request(desc, req)) - nfs_list_add_request(req, &failed); + nfs_list_move_request(req, &failed); } nfs_pageio_complete(desc); if (!list_empty(&failed)) { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c @@ -758,22 +758,35 @@ static int pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, struct nfs_server *server, struct list_head *layout_list) + __must_hold(&clp->cl_lock) + __must_hold(RCU) { struct pnfs_layout_hdr *lo, *next; struct inode *inode; list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { - if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) + if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) || + test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) || + !list_empty(&lo->plh_bulk_destroy)) continue; + /* If the sb is being destroyed, just bail */ + if (!nfs_sb_active(server->super)) + break; inode = igrab(lo->plh_inode); - if (inode == NULL) - continue; - list_del_init(&lo->plh_layouts); - if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) - continue; - rcu_read_unlock(); - spin_unlock(&clp->cl_lock); - iput(inode); + if (inode != NULL) { + list_del_init(&lo->plh_layouts); + if (pnfs_layout_add_bulk_destroy_list(inode, + layout_list)) + continue; + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); + iput(inode); + } else { + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); + set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags); + } + nfs_sb_deactive(server->super); spin_lock(&clp->cl_lock); rcu_read_lock(); return -EAGAIN; @@ -811,7 +824,7 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, /* Free all lsegs that are attached to commit buckets */ nfs_commit_inode(inode, 0); pnfs_put_layout_hdr(lo); - iput(inode); + nfs_iput_and_deactive(inode); } return ret; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h @@ -104,6 +104,7 @@ enum { NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ + NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */ }; enum layoutdriver_policy_flags { @@ -349,6 +350,7 @@ void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nf void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, struct nfs_server *, const struct nfs4_deviceid *); bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); +void nfs4_mark_deviceid_available(struct nfs4_deviceid_node *node); void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); void nfs4_deviceid_purge_client(const struct nfs_client *); diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c @@ -284,10 +284,22 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node); void +nfs4_mark_deviceid_available(struct nfs4_deviceid_node *node) +{ + if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) { + clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); + smp_mb__after_atomic(); + } +} +EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_available); + +void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node) { node->timestamp_unavailable = jiffies; + smp_mb__before_atomic(); set_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); + smp_mb__after_atomic(); } EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_unavailable); @@ -302,6 +314,7 @@ nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node) if (time_in_range(node->timestamp_unavailable, start, end)) return true; clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); + smp_mb__after_atomic(); } return false; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c @@ -205,7 +205,7 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr, } static void -nfs_async_read_error(struct list_head *head) +nfs_async_read_error(struct list_head *head, int error) { struct nfs_page *req; diff --git a/fs/nfs/super.c b/fs/nfs/super.c @@ -1919,7 +1919,7 @@ static int nfs_parse_devname(const char *dev_name, /* kill possible hostname list: not supported */ comma = strchr(dev_name, ','); if (comma != NULL && comma < end) - *comma = 0; + len = comma - dev_name; } if (len > maxnamlen) diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c @@ -39,6 +39,7 @@ nfs_free_unlinkdata(struct nfs_unlinkdata *data) /** * nfs_async_unlink_done - Sillydelete post-processing * @task: rpc_task of the sillydelete + * @calldata: pointer to nfs_unlinkdata * * Do the directory attribute update. */ @@ -54,7 +55,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) /** * nfs_async_unlink_release - Release the sillydelete data. - * @task: rpc_task of the sillydelete + * @calldata: struct nfs_unlinkdata to release * * We need to call nfs_put_unlinkdata as a 'tk_release' task since the * rpc_task would be freed too. @@ -159,8 +160,8 @@ static int nfs_call_unlink(struct dentry *dentry, struct inode *inode, struct nf /** * nfs_async_unlink - asynchronous unlinking of a file - * @dir: parent directory of dentry - * @dentry: dentry to unlink + * @dentry: parent directory of dentry + * @name: name of dentry to unlink */ static int nfs_async_unlink(struct dentry *dentry, const struct qstr *name) @@ -324,6 +325,7 @@ static const struct rpc_call_ops nfs_rename_ops = { * @new_dir: target directory for the rename * @old_dentry: original dentry to be renamed * @new_dentry: dentry to which the old_dentry should be renamed + * @complete: Function to run on successful completion * * It's expected that valid references to the dentries and inodes are held */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c @@ -26,6 +26,7 @@ #include <linux/iversion.h> #include <linux/uaccess.h> +#include <linux/sched/mm.h> #include "delegation.h" #include "internal.h" @@ -712,11 +713,13 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; struct nfs_pageio_descriptor pgio; - struct nfs_io_completion *ioc = nfs_io_completion_alloc(GFP_NOFS); + struct nfs_io_completion *ioc; + unsigned int pflags = memalloc_nofs_save(); int err; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); + ioc = nfs_io_completion_alloc(GFP_NOFS); if (ioc) nfs_io_completion_init(ioc, nfs_io_completion_commit, inode); @@ -727,6 +730,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_pageio_complete(&pgio); nfs_io_completion_put(ioc); + memalloc_nofs_restore(pflags); + if (err < 0) goto out_err; err = pgio.pg_error; @@ -865,7 +870,6 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked); /** * nfs_request_add_commit_list - add request to a commit list * @req: pointer to a struct nfs_page - * @dst: commit list head * @cinfo: holds list lock and accounting info * * This sets the PG_CLEAN bit, updates the cinfo count of @@ -1412,20 +1416,27 @@ static void nfs_redirty_request(struct nfs_page *req) nfs_release_request(req); } -static void nfs_async_write_error(struct list_head *head) +static void nfs_async_write_error(struct list_head *head, int error) { struct nfs_page *req; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); + if (nfs_error_is_fatal(error)) { + nfs_context_set_write_error(req->wb_context, error); + if (nfs_error_is_fatal_on_server(error)) { + nfs_write_error_remove_page(req); + continue; + } + } nfs_redirty_request(req); } } static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr) { - nfs_async_write_error(&hdr->pages); + nfs_async_write_error(&hdr->pages, 0); filemap_fdatawrite_range(hdr->inode->i_mapping, hdr->args.offset, hdr->args.offset + hdr->args.count - 1); } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c @@ -60,16 +60,6 @@ struct nfs4_cb_compound_hdr { int status; }; -/* - * Handle decode buffer overflows out-of-line. - */ -static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) -{ - dprintk("NFS: %s prematurely hit the end of our receive buffer. " - "Remaining buffer length is %tu words.\n", - func, xdr->end - xdr->p); -} - static __be32 *xdr_encode_empty_array(__be32 *p) { *p++ = xdr_zero; @@ -240,7 +230,6 @@ static int decode_cb_op_status(struct xdr_stream *xdr, *status = nfs_cb_stat_to_errno(be32_to_cpup(p)); return 0; out_overflow: - print_overflow_msg(__func__, xdr); return -EIO; out_unexpected: dprintk("NFSD: Callback server returned operation %d but " @@ -309,7 +298,6 @@ static int decode_cb_compound4res(struct xdr_stream *xdr, hdr->nops = be32_to_cpup(p); return 0; out_overflow: - print_overflow_msg(__func__, xdr); return -EIO; } @@ -437,7 +425,6 @@ out: cb->cb_seq_status = status; return status; out_overflow: - print_overflow_msg(__func__, xdr); status = -EIO; goto out; } diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h @@ -538,6 +538,7 @@ enum { NFSPROC4_CLNT_OFFLOAD_CANCEL, NFSPROC4_CLNT_LOOKUPP, + NFSPROC4_CLNT_LAYOUTERROR, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h @@ -261,5 +261,6 @@ struct nfs_server { #define NFS_CAP_CLONE (1U << 23) #define NFS_CAP_COPY (1U << 24) #define NFS_CAP_OFFLOAD_CANCEL (1U << 25) +#define NFS_CAP_LAYOUTERROR (1U << 26) #endif diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h @@ -164,6 +164,16 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head) list_add_tail(&req->wb_list, head); } +/** + * nfs_list_move_request - Move a request to a new list + * @req: request + * @head: head of list into which to insert the request. + */ +static inline void +nfs_list_move_request(struct nfs_page *req, struct list_head *head) +{ + list_move_tail(&req->wb_list, head); +} /** * nfs_list_remove_request - Remove a request from its wb_list diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h @@ -383,6 +383,41 @@ struct nfs42_layoutstat_data { struct nfs42_layoutstat_res res; }; +struct nfs42_device_error { + struct nfs4_deviceid dev_id; + int status; + enum nfs_opnum4 opnum; +}; + +struct nfs42_layout_error { + __u64 offset; + __u64 length; + nfs4_stateid stateid; + struct nfs42_device_error errors[1]; +}; + +#define NFS42_LAYOUTERROR_MAX 5 + +struct nfs42_layouterror_args { + struct nfs4_sequence_args seq_args; + struct inode *inode; + unsigned int num_errors; + struct nfs42_layout_error errors[NFS42_LAYOUTERROR_MAX]; +}; + +struct nfs42_layouterror_res { + struct nfs4_sequence_res seq_res; + unsigned int num_errors; + int rpc_status; +}; + +struct nfs42_layouterror_data { + struct nfs42_layouterror_args args; + struct nfs42_layouterror_res res; + struct inode *inode; + struct pnfs_layout_segment *lseg; +}; + struct nfs42_clone_args { struct nfs4_sequence_args seq_args; struct nfs_fh *src_fh; @@ -1549,7 +1584,7 @@ struct nfs_commit_data { }; struct nfs_pgio_completion_ops { - void (*error_cleanup)(struct list_head *head); + void (*error_cleanup)(struct list_head *head, int); void (*init_hdr)(struct nfs_pgio_header *hdr); void (*completion)(struct nfs_pgio_header *hdr); void (*reschedule_io)(struct nfs_pgio_header *hdr); diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h @@ -74,14 +74,12 @@ struct rpc_cred_cache; struct rpc_authops; struct rpc_auth { unsigned int au_cslack; /* call cred size estimate */ - /* guess at number of u32's auth adds before - * reply data; normally the verifier size: */ - unsigned int au_rslack; - /* for gss, used to calculate au_rslack: */ - unsigned int au_verfsize; - - unsigned int au_flags; /* various flags */ - const struct rpc_authops *au_ops; /* operations */ + unsigned int au_rslack; /* reply cred size estimate */ + unsigned int au_verfsize; /* size of reply verifier */ + unsigned int au_ralign; /* words before UL header */ + + unsigned int au_flags; + const struct rpc_authops *au_ops; rpc_authflavor_t au_flavor; /* pseudoflavor (note may * differ from the flavor in * au_ops->au_flavor in gss @@ -131,13 +129,15 @@ struct rpc_credops { void (*crdestroy)(struct rpc_cred *); int (*crmatch)(struct auth_cred *, struct rpc_cred *, int); - __be32 * (*crmarshal)(struct rpc_task *, __be32 *); + int (*crmarshal)(struct rpc_task *task, + struct xdr_stream *xdr); int (*crrefresh)(struct rpc_task *); - __be32 * (*crvalidate)(struct rpc_task *, __be32 *); - int (*crwrap_req)(struct rpc_task *, kxdreproc_t, - void *, __be32 *, void *); - int (*crunwrap_resp)(struct rpc_task *, kxdrdproc_t, - void *, __be32 *, void *); + int (*crvalidate)(struct rpc_task *task, + struct xdr_stream *xdr); + int (*crwrap_req)(struct rpc_task *task, + struct xdr_stream *xdr); + int (*crunwrap_resp)(struct rpc_task *task, + struct xdr_stream *xdr); int (*crkey_timeout)(struct rpc_cred *); char * (*crstringify_acceptor)(struct rpc_cred *); bool (*crneed_reencode)(struct rpc_task *); @@ -165,10 +165,18 @@ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred * void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); void put_rpccred(struct rpc_cred *); -__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *); -__be32 * rpcauth_checkverf(struct rpc_task *, __be32 *); -int rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj); -int rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj); +int rpcauth_marshcred(struct rpc_task *task, + struct xdr_stream *xdr); +int rpcauth_checkverf(struct rpc_task *task, + struct xdr_stream *xdr); +int rpcauth_wrap_req_encode(struct rpc_task *task, + struct xdr_stream *xdr); +int rpcauth_wrap_req(struct rpc_task *task, + struct xdr_stream *xdr); +int rpcauth_unwrap_resp_decode(struct rpc_task *task, + struct xdr_stream *xdr); +int rpcauth_unwrap_resp(struct rpc_task *task, + struct xdr_stream *xdr); bool rpcauth_xmit_need_reencode(struct rpc_task *task); int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h @@ -169,6 +169,9 @@ int rpcb_v4_register(struct net *net, const u32 program, const char *netid); void rpcb_getport_async(struct rpc_task *); +void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages, + unsigned int base, unsigned int len, + unsigned int hdrsize); void rpc_call_start(struct rpc_task *); int rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h @@ -1,4 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* - * Dumb way to share this static piece of information with nfsd + * Define the string that exports the set of kernel-supported + * Kerberos enctypes. This list is sent via upcall to gssd, and + * is also exposed via the nfsd /proc API. The consumers generally + * treat this as an ordered list, where the first item in the list + * is the most preferred. + */ + +#ifndef _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H +#define _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H + +#ifdef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES + +/* + * NB: This list includes encryption types that were deprecated + * by RFC 8429 (DES3_CBC_SHA1 and ARCFOUR_HMAC). + * + * ENCTYPE_AES256_CTS_HMAC_SHA1_96 + * ENCTYPE_AES128_CTS_HMAC_SHA1_96 + * ENCTYPE_DES3_CBC_SHA1 + * ENCTYPE_ARCFOUR_HMAC + */ +#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23" + +#else /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ + +/* + * NB: This list includes encryption types that were deprecated + * by RFC 8429 and RFC 6649. + * + * ENCTYPE_AES256_CTS_HMAC_SHA1_96 + * ENCTYPE_AES128_CTS_HMAC_SHA1_96 + * ENCTYPE_DES3_CBC_SHA1 + * ENCTYPE_ARCFOUR_HMAC + * ENCTYPE_DES_CBC_MD5 + * ENCTYPE_DES_CBC_CRC + * ENCTYPE_DES_CBC_MD4 */ #define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2" + +#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ + +#endif /* _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h @@ -97,6 +97,7 @@ typedef void (*rpc_action)(struct rpc_task *); struct rpc_call_ops { void (*rpc_call_prepare)(struct rpc_task *, void *); + void (*rpc_call_prepare_transmit)(struct rpc_task *, void *); void (*rpc_call_done)(struct rpc_task *, void *); void (*rpc_count_stats)(struct rpc_task *, void *); void (*rpc_release)(void *); @@ -303,4 +304,12 @@ rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) } #endif /* CONFIG_SUNRPC_SWAP */ +static inline bool +rpc_task_need_resched(const struct rpc_task *task) +{ + if (RPC_IS_QUEUED(task) || task->tk_callback) + return true; + return false; +} + #endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h @@ -87,6 +87,16 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) #define xdr_one cpu_to_be32(1) #define xdr_two cpu_to_be32(2) +#define rpc_auth_null cpu_to_be32(RPC_AUTH_NULL) +#define rpc_auth_unix cpu_to_be32(RPC_AUTH_UNIX) +#define rpc_auth_short cpu_to_be32(RPC_AUTH_SHORT) +#define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS) + +#define rpc_call cpu_to_be32(RPC_CALL) +#define rpc_reply cpu_to_be32(RPC_REPLY) + +#define rpc_msg_accepted cpu_to_be32(RPC_MSG_ACCEPTED) + #define rpc_success cpu_to_be32(RPC_SUCCESS) #define rpc_prog_unavail cpu_to_be32(RPC_PROG_UNAVAIL) #define rpc_prog_mismatch cpu_to_be32(RPC_PROG_MISMATCH) @@ -95,6 +105,9 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) #define rpc_system_err cpu_to_be32(RPC_SYSTEM_ERR) #define rpc_drop_reply cpu_to_be32(RPC_DROP_REPLY) +#define rpc_mismatch cpu_to_be32(RPC_MISMATCH) +#define rpc_auth_error cpu_to_be32(RPC_AUTH_ERROR) + #define rpc_auth_ok cpu_to_be32(RPC_AUTH_OK) #define rpc_autherr_badcred cpu_to_be32(RPC_AUTH_BADCRED) #define rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED) @@ -103,7 +116,6 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) #define rpc_autherr_tooweak cpu_to_be32(RPC_AUTH_TOOWEAK) #define rpcsec_gsserr_credproblem cpu_to_be32(RPCSEC_GSS_CREDPROBLEM) #define rpcsec_gsserr_ctxproblem cpu_to_be32(RPCSEC_GSS_CTXPROBLEM) -#define rpc_autherr_oldseqnum cpu_to_be32(101) /* * Miscellaneous XDR helper functions @@ -167,7 +179,6 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); -extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); @@ -217,6 +228,8 @@ struct xdr_stream { struct kvec scratch; /* Scratch buffer */ struct page **page_ptr; /* pointer to the current page */ unsigned int nwords; /* Remaining decode buffer length */ + + struct rpc_rqst *rqst; /* For debugging */ }; /* @@ -227,7 +240,8 @@ typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, void *obj); -extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, + __be32 *p, struct rpc_rqst *rqst); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern void xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); @@ -235,7 +249,8 @@ extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); -extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, + __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, unsigned int len); extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h @@ -196,8 +196,6 @@ struct rpc_xprt { size_t max_payload; /* largest RPC payload size, in bytes */ - unsigned int tsh_size; /* size of transport specific - header */ struct rpc_wait_queue binding; /* requests waiting on rpcbind */ struct rpc_wait_queue sending; /* requests waiting to send */ @@ -362,11 +360,6 @@ struct rpc_xprt * xprt_alloc(struct net *net, size_t size, unsigned int max_req); void xprt_free(struct rpc_xprt *); -static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) -{ - return p + xprt->tsh_size; -} - static inline int xprt_enable_swap(struct rpc_xprt *xprt) { diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h @@ -26,6 +26,7 @@ struct sock_xprt { */ struct socket * sock; struct sock * inet; + struct file * file; /* * State of TCP reply receive diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h @@ -0,0 +1,361 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018 Oracle. All rights reserved. + * + * Trace point definitions for the "rpcgss" subsystem. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rpcgss + +#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RPCGSS_H + +#include <linux/tracepoint.h> + +/** + ** GSS-API related trace events + **/ + +TRACE_DEFINE_ENUM(GSS_S_BAD_MECH); +TRACE_DEFINE_ENUM(GSS_S_BAD_NAME); +TRACE_DEFINE_ENUM(GSS_S_BAD_NAMETYPE); +TRACE_DEFINE_ENUM(GSS_S_BAD_BINDINGS); +TRACE_DEFINE_ENUM(GSS_S_BAD_STATUS); +TRACE_DEFINE_ENUM(GSS_S_BAD_SIG); +TRACE_DEFINE_ENUM(GSS_S_NO_CRED); +TRACE_DEFINE_ENUM(GSS_S_NO_CONTEXT); +TRACE_DEFINE_ENUM(GSS_S_DEFECTIVE_TOKEN); +TRACE_DEFINE_ENUM(GSS_S_DEFECTIVE_CREDENTIAL); +TRACE_DEFINE_ENUM(GSS_S_CREDENTIALS_EXPIRED); +TRACE_DEFINE_ENUM(GSS_S_CONTEXT_EXPIRED); +TRACE_DEFINE_ENUM(GSS_S_FAILURE); +TRACE_DEFINE_ENUM(GSS_S_BAD_QOP); +TRACE_DEFINE_ENUM(GSS_S_UNAUTHORIZED); +TRACE_DEFINE_ENUM(GSS_S_UNAVAILABLE); +TRACE_DEFINE_ENUM(GSS_S_DUPLICATE_ELEMENT); +TRACE_DEFINE_ENUM(GSS_S_NAME_NOT_MN); +TRACE_DEFINE_ENUM(GSS_S_CONTINUE_NEEDED); +TRACE_DEFINE_ENUM(GSS_S_DUPLICATE_TOKEN); +TRACE_DEFINE_ENUM(GSS_S_OLD_TOKEN); +TRACE_DEFINE_ENUM(GSS_S_UNSEQ_TOKEN); +TRACE_DEFINE_ENUM(GSS_S_GAP_TOKEN); + +#define show_gss_status(x) \ + __print_flags(x, "|", \ + { GSS_S_BAD_MECH, "GSS_S_BAD_MECH" }, \ + { GSS_S_BAD_NAME, "GSS_S_BAD_NAME" }, \ + { GSS_S_BAD_NAMETYPE, "GSS_S_BAD_NAMETYPE" }, \ + { GSS_S_BAD_BINDINGS, "GSS_S_BAD_BINDINGS" }, \ + { GSS_S_BAD_STATUS, "GSS_S_BAD_STATUS" }, \ + { GSS_S_BAD_SIG, "GSS_S_BAD_SIG" }, \ + { GSS_S_NO_CRED, "GSS_S_NO_CRED" }, \ + { GSS_S_NO_CONTEXT, "GSS_S_NO_CONTEXT" }, \ + { GSS_S_DEFECTIVE_TOKEN, "GSS_S_DEFECTIVE_TOKEN" }, \ + { GSS_S_DEFECTIVE_CREDENTIAL, "GSS_S_DEFECTIVE_CREDENTIAL" }, \ + { GSS_S_CREDENTIALS_EXPIRED, "GSS_S_CREDENTIALS_EXPIRED" }, \ + { GSS_S_CONTEXT_EXPIRED, "GSS_S_CONTEXT_EXPIRED" }, \ + { GSS_S_FAILURE, "GSS_S_FAILURE" }, \ + { GSS_S_BAD_QOP, "GSS_S_BAD_QOP" }, \ + { GSS_S_UNAUTHORIZED, "GSS_S_UNAUTHORIZED" }, \ + { GSS_S_UNAVAILABLE, "GSS_S_UNAVAILABLE" }, \ + { GSS_S_DUPLICATE_ELEMENT, "GSS_S_DUPLICATE_ELEMENT" }, \ + { GSS_S_NAME_NOT_MN, "GSS_S_NAME_NOT_MN" }, \ + { GSS_S_CONTINUE_NEEDED, "GSS_S_CONTINUE_NEEDED" }, \ + { GSS_S_DUPLICATE_TOKEN, "GSS_S_DUPLICATE_TOKEN" }, \ + { GSS_S_OLD_TOKEN, "GSS_S_OLD_TOKEN" }, \ + { GSS_S_UNSEQ_TOKEN, "GSS_S_UNSEQ_TOKEN" }, \ + { GSS_S_GAP_TOKEN, "GSS_S_GAP_TOKEN" }) + + +DECLARE_EVENT_CLASS(rpcgss_gssapi_event, + TP_PROTO( + const struct rpc_task *task, + u32 maj_stat + ), + + TP_ARGS(task, maj_stat), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, maj_stat) + + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->maj_stat = maj_stat; + ), + + TP_printk("task:%u@%u maj_stat=%s", + __entry->task_id, __entry->client_id, + __entry->maj_stat == 0 ? + "GSS_S_COMPLETE" : show_gss_status(__entry->maj_stat)) +); + +#define DEFINE_GSSAPI_EVENT(name) \ + DEFINE_EVENT(rpcgss_gssapi_event, rpcgss_##name, \ + TP_PROTO( \ + const struct rpc_task *task, \ + u32 maj_stat \ + ), \ + TP_ARGS(task, maj_stat)) + +TRACE_EVENT(rpcgss_import_ctx, + TP_PROTO( + int status + ), + + TP_ARGS(status), + + TP_STRUCT__entry( + __field(int, status) + ), + + TP_fast_assign( + __entry->status = status; + ), + + TP_printk("status=%d", __entry->status) +); + +DEFINE_GSSAPI_EVENT(get_mic); +DEFINE_GSSAPI_EVENT(verify_mic); +DEFINE_GSSAPI_EVENT(wrap); +DEFINE_GSSAPI_EVENT(unwrap); + + +/** + ** GSS auth unwrap failures + **/ + +TRACE_EVENT(rpcgss_unwrap_failed, + TP_PROTO( + const struct rpc_task *task + ), + + TP_ARGS(task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + ), + + TP_printk("task:%u@%u", __entry->task_id, __entry->client_id) +); + +TRACE_EVENT(rpcgss_bad_seqno, + TP_PROTO( + const struct rpc_task *task, + u32 expected, + u32 received + ), + + TP_ARGS(task, expected, received), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, expected) + __field(u32, received) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->expected = expected; + __entry->received = received; + ), + + TP_printk("task:%u@%u expected seqno %u, received seqno %u", + __entry->task_id, __entry->client_id, + __entry->expected, __entry->received) +); + +TRACE_EVENT(rpcgss_seqno, + TP_PROTO( + const struct rpc_task *task + ), + + TP_ARGS(task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(u32, seqno) + ), + + TP_fast_assign( + const struct rpc_rqst *rqst = task->tk_rqstp; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->seqno = rqst->rq_seqno; + ), + + TP_printk("task:%u@%u xid=0x%08x seqno=%u", + __entry->task_id, __entry->client_id, + __entry->xid, __entry->seqno) +); + +TRACE_EVENT(rpcgss_need_reencode, + TP_PROTO( + const struct rpc_task *task, + u32 seq_xmit, + bool ret + ), + + TP_ARGS(task, seq_xmit, ret), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(u32, seq_xmit) + __field(u32, seqno) + __field(bool, ret) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); + __entry->seq_xmit = seq_xmit; + __entry->seqno = task->tk_rqstp->rq_seqno; + __entry->ret = ret; + ), + + TP_printk("task:%u@%u xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded", + __entry->task_id, __entry->client_id, + __entry->xid, __entry->seqno, __entry->seq_xmit, + __entry->ret ? "" : "un") +); + +/** + ** gssd upcall related trace events + **/ + +TRACE_EVENT(rpcgss_upcall_msg, + TP_PROTO( + const char *buf + ), + + TP_ARGS(buf), + + TP_STRUCT__entry( + __string(msg, buf) + ), + + TP_fast_assign( + __assign_str(msg, buf) + ), + + TP_printk("msg='%s'", __get_str(msg)) +); + +TRACE_EVENT(rpcgss_upcall_result, + TP_PROTO( + u32 uid, + int result + ), + + TP_ARGS(uid, result), + + TP_STRUCT__entry( + __field(u32, uid) + __field(int, result) + + ), + + TP_fast_assign( + __entry->uid = uid; + __entry->result = result; + ), + + TP_printk("for uid %u, result=%d", __entry->uid, __entry->result) +); + +TRACE_EVENT(rpcgss_context, + TP_PROTO( + unsigned long expiry, + unsigned long now, + unsigned int timeout, + unsigned int len, + const u8 *data + ), + + TP_ARGS(expiry, now, timeout, len, data), + + TP_STRUCT__entry( + __field(unsigned long, expiry) + __field(unsigned long, now) + __field(unsigned int, timeout) + __field(int, len) + __string(acceptor, data) + ), + + TP_fast_assign( + __entry->expiry = expiry; + __entry->now = now; + __entry->timeout = timeout; + __entry->len = len; + strncpy(__get_str(acceptor), data, len); + ), + + TP_printk("gc_expiry=%lu now=%lu timeout=%u acceptor=%.*s", + __entry->expiry, __entry->now, __entry->timeout, + __entry->len, __get_str(acceptor)) +); + + +/** + ** Miscellaneous events + */ + +TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5); +TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5I); +TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5P); + +#define show_pseudoflavor(x) \ + __print_symbolic(x, \ + { RPC_AUTH_GSS_KRB5, "RPC_AUTH_GSS_KRB5" }, \ + { RPC_AUTH_GSS_KRB5I, "RPC_AUTH_GSS_KRB5I" }, \ + { RPC_AUTH_GSS_KRB5P, "RPC_AUTH_GSS_KRB5P" }) + + +TRACE_EVENT(rpcgss_createauth, + TP_PROTO( + unsigned int flavor, + int error + ), + + TP_ARGS(flavor, error), + + TP_STRUCT__entry( + __field(unsigned int, flavor) + __field(int, error) + + ), + + TP_fast_assign( + __entry->flavor = flavor; + __entry->error = error; + ), + + TP_printk("flavor=%s error=%d", + show_pseudoflavor(__entry->flavor), __entry->error) +); + + +#endif /* _TRACE_RPCGSS_H */ + +#include <trace/define_trace.h> diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h @@ -521,12 +521,18 @@ TRACE_EVENT(xprtrdma_post_send, TP_STRUCT__entry( __field(const void *, req) + __field(unsigned int, task_id) + __field(unsigned int, client_id) __field(int, num_sge) __field(int, signaled) __field(int, status) ), TP_fast_assign( + const struct rpc_rqst *rqst = &req->rl_slot; + + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; __entry->req = req; __entry->num_sge = req->rl_sendctx->sc_wr.num_sge; __entry->signaled = req->rl_sendctx->sc_wr.send_flags & @@ -534,9 +540,11 @@ TRACE_EVENT(xprtrdma_post_send, __entry->status = status; ), - TP_printk("req=%p, %d SGEs%s, status=%d", + TP_printk("task:%u@%u req=%p (%d SGE%s) %sstatus=%d", + __entry->task_id, __entry->client_id, __entry->req, __entry->num_sge, - (__entry->signaled ? ", signaled" : ""), + (__entry->num_sge == 1 ? "" : "s"), + (__entry->signaled ? "signaled " : ""), __entry->status ) ); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h @@ -77,6 +77,50 @@ TRACE_EVENT(rpc_request, ) ); +TRACE_DEFINE_ENUM(RPC_TASK_ASYNC); +TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER); +TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN); +TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS); +TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC); +TRACE_DEFINE_ENUM(RPC_TASK_KILLED); +TRACE_DEFINE_ENUM(RPC_TASK_SOFT); +TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN); +TRACE_DEFINE_ENUM(RPC_TASK_SENT); +TRACE_DEFINE_ENUM(RPC_TASK_TIMEOUT); +TRACE_DEFINE_ENUM(RPC_TASK_NOCONNECT); +TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT); + +#define rpc_show_task_flags(flags) \ + __print_flags(flags, "|", \ + { RPC_TASK_ASYNC, "ASYNC" }, \ + { RPC_TASK_SWAPPER, "SWAPPER" }, \ + { RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \ + { RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \ + { RPC_TASK_DYNAMIC, "DYNAMIC" }, \ + { RPC_TASK_KILLED, "KILLED" }, \ + { RPC_TASK_SOFT, "SOFT" }, \ + { RPC_TASK_SOFTCONN, "SOFTCONN" }, \ + { RPC_TASK_SENT, "SENT" }, \ + { RPC_TASK_TIMEOUT, "TIMEOUT" }, \ + { RPC_TASK_NOCONNECT, "NOCONNECT" }, \ + { RPC_TASK_NO_RETRANS_TIMEOUT, "NORTO" }) + +TRACE_DEFINE_ENUM(RPC_TASK_RUNNING); +TRACE_DEFINE_ENUM(RPC_TASK_QUEUED); +TRACE_DEFINE_ENUM(RPC_TASK_ACTIVE); +TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT); +TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV); +TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT); + +#define rpc_show_runstate(flags) \ + __print_flags(flags, "|", \ + { (1UL << RPC_TASK_RUNNING), "RUNNING" }, \ + { (1UL << RPC_TASK_QUEUED), "QUEUED" }, \ + { (1UL << RPC_TASK_ACTIVE), "ACTIVE" }, \ + { (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" }, \ + { (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" }, \ + { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }) + DECLARE_EVENT_CLASS(rpc_task_running, TP_PROTO(const struct rpc_task *task, const void *action), @@ -102,10 +146,10 @@ DECLARE_EVENT_CLASS(rpc_task_running, __entry->flags = task->tk_flags; ), - TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d action=%pf", + TP_printk("task:%u@%d flags=%s runstate=%s status=%d action=%pf", __entry->task_id, __entry->client_id, - __entry->flags, - __entry->runstate, + rpc_show_task_flags(__entry->flags), + rpc_show_runstate(__entry->runstate), __entry->status, __entry->action ) @@ -149,10 +193,10 @@ DECLARE_EVENT_CLASS(rpc_task_queued, __assign_str(q_name, rpc_qname(q)); ), - TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s", + TP_printk("task:%u@%d flags=%s runstate=%s status=%d timeout=%lu queue=%s", __entry->task_id, __entry->client_id, - __entry->flags, - __entry->runstate, + rpc_show_task_flags(__entry->flags), + rpc_show_runstate(__entry->runstate), __entry->status, __entry->timeout, __get_str(q_name) @@ -169,6 +213,87 @@ DECLARE_EVENT_CLASS(rpc_task_queued, DEFINE_RPC_QUEUED_EVENT(sleep); DEFINE_RPC_QUEUED_EVENT(wakeup); +DECLARE_EVENT_CLASS(rpc_failure, + + TP_PROTO(const struct rpc_task *task), + + TP_ARGS(task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + ), + + TP_printk("task:%u@%u", + __entry->task_id, __entry->client_id) +); + +#define DEFINE_RPC_FAILURE(name) \ + DEFINE_EVENT(rpc_failure, rpc_bad_##name, \ + TP_PROTO( \ + const struct rpc_task *task \ + ), \ + TP_ARGS(task)) + +DEFINE_RPC_FAILURE(callhdr); +DEFINE_RPC_FAILURE(verifier); + +DECLARE_EVENT_CLASS(rpc_reply_event, + + TP_PROTO( + const struct rpc_task *task + ), + + TP_ARGS(task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __string(progname, task->tk_client->cl_program->name) + __field(u32, version) + __string(procname, rpc_proc_name(task)) + __string(servername, task->tk_xprt->servername) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); + __assign_str(progname, task->tk_client->cl_program->name) + __entry->version = task->tk_client->cl_vers; + __assign_str(procname, rpc_proc_name(task)) + __assign_str(servername, task->tk_xprt->servername) + ), + + TP_printk("task:%u@%d server=%s xid=0x%08x %sv%d %s", + __entry->task_id, __entry->client_id, __get_str(servername), + __entry->xid, __get_str(progname), __entry->version, + __get_str(procname)) +) + +#define DEFINE_RPC_REPLY_EVENT(name) \ + DEFINE_EVENT(rpc_reply_event, rpc__##name, \ + TP_PROTO( \ + const struct rpc_task *task \ + ), \ + TP_ARGS(task)) + +DEFINE_RPC_REPLY_EVENT(prog_unavail); +DEFINE_RPC_REPLY_EVENT(prog_mismatch); +DEFINE_RPC_REPLY_EVENT(proc_unavail); +DEFINE_RPC_REPLY_EVENT(garbage_args); +DEFINE_RPC_REPLY_EVENT(unparsable); +DEFINE_RPC_REPLY_EVENT(mismatch); +DEFINE_RPC_REPLY_EVENT(stale_creds); +DEFINE_RPC_REPLY_EVENT(bad_creds); +DEFINE_RPC_REPLY_EVENT(auth_tooweak); + TRACE_EVENT(rpc_stats_latency, TP_PROTO( @@ -210,6 +335,169 @@ TRACE_EVENT(rpc_stats_latency, __entry->backlog, __entry->rtt, __entry->execute) ); +TRACE_EVENT(rpc_xdr_overflow, + TP_PROTO( + const struct xdr_stream *xdr, + size_t requested + ), + + TP_ARGS(xdr, requested), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(int, version) + __field(size_t, requested) + __field(const void *, end) + __field(const void *, p) + __field(const void *, head_base) + __field(size_t, head_len) + __field(const void *, tail_base) + __field(size_t, tail_len) + __field(unsigned int, page_len) + __field(unsigned int, len) + __string(progname, + xdr->rqst->rq_task->tk_client->cl_program->name) + __string(procedure, + xdr->rqst->rq_task->tk_msg.rpc_proc->p_name) + ), + + TP_fast_assign( + if (xdr->rqst) { + const struct rpc_task *task = xdr->rqst->rq_task; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __assign_str(progname, + task->tk_client->cl_program->name) + __entry->version = task->tk_client->cl_vers; + __assign_str(procedure, task->tk_msg.rpc_proc->p_name) + } else { + __entry->task_id = 0; + __entry->client_id = 0; + __assign_str(progname, "unknown") + __entry->version = 0; + __assign_str(procedure, "unknown") + } + __entry->requested = requested; + __entry->end = xdr->end; + __entry->p = xdr->p; + __entry->head_base = xdr->buf->head[0].iov_base, + __entry->head_len = xdr->buf->head[0].iov_len, + __entry->page_len = xdr->buf->page_len, + __entry->tail_base = xdr->buf->tail[0].iov_base, + __entry->tail_len = xdr->buf->tail[0].iov_len, + __entry->len = xdr->buf->len; + ), + + TP_printk( + "task:%u@%u %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", + __entry->task_id, __entry->client_id, + __get_str(progname), __entry->version, __get_str(procedure), + __entry->requested, __entry->p, __entry->end, + __entry->head_base, __entry->head_len, + __entry->page_len, + __entry->tail_base, __entry->tail_len, + __entry->len + ) +); + +TRACE_EVENT(rpc_xdr_alignment, + TP_PROTO( + const struct xdr_stream *xdr, + size_t offset, + unsigned int copied + ), + + TP_ARGS(xdr, offset, copied), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(int, version) + __field(size_t, offset) + __field(unsigned int, copied) + __field(const void *, head_base) + __field(size_t, head_len) + __field(const void *, tail_base) + __field(size_t, tail_len) + __field(unsigned int, page_len) + __field(unsigned int, len) + __string(progname, + xdr->rqst->rq_task->tk_client->cl_program->name) + __string(procedure, + xdr->rqst->rq_task->tk_msg.rpc_proc->p_name) + ), + + TP_fast_assign( + const struct rpc_task *task = xdr->rqst->rq_task; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __assign_str(progname, + task->tk_client->cl_program->name) + __entry->version = task->tk_client->cl_vers; + __assign_str(procedure, task->tk_msg.rpc_proc->p_name) + + __entry->offset = offset; + __entry->copied = copied; + __entry->head_base = xdr->buf->head[0].iov_base, + __entry->head_len = xdr->buf->head[0].iov_len, + __entry->page_len = xdr->buf->page_len, + __entry->tail_base = xdr->buf->tail[0].iov_base, + __entry->tail_len = xdr->buf->tail[0].iov_len, + __entry->len = xdr->buf->len; + ), + + TP_printk( + "task:%u@%u %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", + __entry->task_id, __entry->client_id, + __get_str(progname), __entry->version, __get_str(procedure), + __entry->offset, __entry->copied, + __entry->head_base, __entry->head_len, + __entry->page_len, + __entry->tail_base, __entry->tail_len, + __entry->len + ) +); + +TRACE_EVENT(rpc_reply_pages, + TP_PROTO( + const struct rpc_rqst *req + ), + + TP_ARGS(req), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(const void *, head_base) + __field(size_t, head_len) + __field(const void *, tail_base) + __field(size_t, tail_len) + __field(unsigned int, page_len) + ), + + TP_fast_assign( + __entry->task_id = req->rq_task->tk_pid; + __entry->client_id = req->rq_task->tk_client->cl_clid; + + __entry->head_base = req->rq_rcv_buf.head[0].iov_base; + __entry->head_len = req->rq_rcv_buf.head[0].iov_len; + __entry->page_len = req->rq_rcv_buf.page_len; + __entry->tail_base = req->rq_rcv_buf.tail[0].iov_base; + __entry->tail_len = req->rq_rcv_buf.tail[0].iov_len; + ), + + TP_printk( + "task:%u@%u xdr=[%p,%zu]/%u/[%p,%zu]\n", + __entry->task_id, __entry->client_id, + __entry->head_base, __entry->head_len, + __entry->page_len, + __entry->tail_base, __entry->tail_len + ) +); + /* * First define the enums in the below macros to be exported to userspace * via TRACE_DEFINE_ENUM(). @@ -404,9 +692,68 @@ DECLARE_EVENT_CLASS(rpc_xprt_event, DEFINE_RPC_XPRT_EVENT(timer); DEFINE_RPC_XPRT_EVENT(lookup_rqst); -DEFINE_RPC_XPRT_EVENT(transmit); DEFINE_RPC_XPRT_EVENT(complete_rqst); +TRACE_EVENT(xprt_transmit, + TP_PROTO( + const struct rpc_rqst *rqst, + int status + ), + + TP_ARGS(rqst, status), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(u32, seqno) + __field(int, status) + ), + + TP_fast_assign( + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->seqno = rqst->rq_seqno; + __entry->status = status; + ), + + TP_printk( + "task:%u@%u xid=0x%08x seqno=%u status=%d", + __entry->task_id, __entry->client_id, __entry->xid, + __entry->seqno, __entry->status) +); + +TRACE_EVENT(xprt_enq_xmit, + TP_PROTO( + const struct rpc_task *task, + int stage + ), + + TP_ARGS(task, stage), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(u32, seqno) + __field(int, stage) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); + __entry->seqno = task->tk_rqstp->rq_seqno; + __entry->stage = stage; + ), + + TP_printk( + "task:%u@%u xid=0x%08x seqno=%u stage=%d", + __entry->task_id, __entry->client_id, __entry->xid, + __entry->seqno, __entry->stage) +); + TRACE_EVENT(xprt_ping, TP_PROTO(const struct rpc_xprt *xprt, int status), diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig @@ -34,6 +34,22 @@ config RPCSEC_GSS_KRB5 If unsure, say Y. +config CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES + bool "Secure RPC: Disable insecure Kerberos encryption types" + depends on RPCSEC_GSS_KRB5 + default n + help + Choose Y here to disable the use of deprecated encryption types + with the Kerberos version 5 GSS-API mechanism (RFC 1964). The + deprecated encryption types include DES-CBC-MD5, DES-CBC-CRC, + and DES-CBC-MD4. These types were deprecated by RFC 6649 because + they were found to be insecure. + + N is the default because many sites have deployed KDCs and + keytabs that contain only these deprecated encryption types. + Choosing Y prevents the use of known-insecure encryption types + but might result in compatibility problems. + config SUNRPC_DEBUG bool "RPC: Enable dprintk debugging" depends on SUNRPC && SYSCTL diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c @@ -17,9 +17,7 @@ #include <linux/sunrpc/gss_api.h> #include <linux/spinlock.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif +#include <trace/events/sunrpc.h> #define RPC_CREDCACHE_DEFAULT_HASHBITS (4) struct rpc_cred_cache { @@ -267,8 +265,6 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size) } } rcu_read_unlock(); - - dprintk("RPC: %s returns %d\n", __func__, result); return result; } EXPORT_SYMBOL_GPL(rpcauth_list_flavors); @@ -636,9 +632,6 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) struct rpc_cred *ret; const struct cred *cred = current_cred(); - dprintk("RPC: looking up %s cred\n", - auth->au_ops->au_name); - memset(&acred, 0, sizeof(acred)); acred.cred = cred; ret = auth->au_ops->lookup_cred(auth, &acred, flags); @@ -670,8 +663,6 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) }; struct rpc_cred *ret; - dprintk("RPC: %5u looking up %s cred\n", - task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); put_cred(acred.cred); return ret; @@ -688,8 +679,6 @@ rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags) if (!acred.principal) return NULL; - dprintk("RPC: %5u looking up %s machine cred\n", - task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); return auth->au_ops->lookup_cred(auth, &acred, lookupflags); } @@ -698,8 +687,6 @@ rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; - dprintk("RPC: %5u looking up %s cred\n", - task->tk_pid, auth->au_ops->au_name); return rpcauth_lookupcred(auth, lookupflags); } @@ -771,75 +758,102 @@ destroy: } EXPORT_SYMBOL_GPL(put_rpccred); -__be32 * -rpcauth_marshcred(struct rpc_task *task, __be32 *p) +/** + * rpcauth_marshcred - Append RPC credential to end of @xdr + * @task: controlling RPC task + * @xdr: xdr_stream containing initial portion of RPC Call header + * + * On success, an appropriate verifier is added to @xdr, @xdr is + * updated to point past the verifier, and zero is returned. + * Otherwise, @xdr is in an undefined state and a negative errno + * is returned. + */ +int rpcauth_marshcred(struct rpc_task *task, struct xdr_stream *xdr) { - struct rpc_cred *cred = task->tk_rqstp->rq_cred; + const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops; - dprintk("RPC: %5u marshaling %s cred %p\n", - task->tk_pid, cred->cr_auth->au_ops->au_name, cred); - - return cred->cr_ops->crmarshal(task, p); + return ops->crmarshal(task, xdr); } -__be32 * -rpcauth_checkverf(struct rpc_task *task, __be32 *p) +/** + * rpcauth_wrap_req_encode - XDR encode the RPC procedure + * @task: controlling RPC task + * @xdr: stream where on-the-wire bytes are to be marshalled + * + * On success, @xdr contains the encoded and wrapped message. + * Otherwise, @xdr is in an undefined state. + */ +int rpcauth_wrap_req_encode(struct rpc_task *task, struct xdr_stream *xdr) { - struct rpc_cred *cred = task->tk_rqstp->rq_cred; + kxdreproc_t encode = task->tk_msg.rpc_proc->p_encode; - dprintk("RPC: %5u validating %s cred %p\n", - task->tk_pid, cred->cr_auth->au_ops->au_name, cred); - - return cred->cr_ops->crvalidate(task, p); + encode(task->tk_rqstp, xdr, task->tk_msg.rpc_argp); + return 0; } +EXPORT_SYMBOL_GPL(rpcauth_wrap_req_encode); -static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp, - __be32 *data, void *obj) +/** + * rpcauth_wrap_req - XDR encode and wrap the RPC procedure + * @task: controlling RPC task + * @xdr: stream where on-the-wire bytes are to be marshalled + * + * On success, @xdr contains the encoded and wrapped message, + * and zero is returned. Otherwise, @xdr is in an undefined + * state and a negative errno is returned. + */ +int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr) { - struct xdr_stream xdr; + const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops; - xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data); - encode(rqstp, &xdr, obj); + return ops->crwrap_req(task, xdr); } +/** + * rpcauth_checkverf - Validate verifier in RPC Reply header + * @task: controlling RPC task + * @xdr: xdr_stream containing RPC Reply header + * + * On success, @xdr is updated to point past the verifier and + * zero is returned. Otherwise, @xdr is in an undefined state + * and a negative errno is returned. + */ int -rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, - __be32 *data, void *obj) +rpcauth_checkverf(struct rpc_task *task, struct xdr_stream *xdr) { - struct rpc_cred *cred = task->tk_rqstp->rq_cred; + const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops; - dprintk("RPC: %5u using %s cred %p to wrap rpc data\n", - task->tk_pid, cred->cr_ops->cr_name, cred); - if (cred->cr_ops->crwrap_req) - return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj); - /* By default, we encode the arguments normally. */ - rpcauth_wrap_req_encode(encode, rqstp, data, obj); - return 0; + return ops->crvalidate(task, xdr); } -static int -rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp, - __be32 *data, void *obj) +/** + * rpcauth_unwrap_resp_decode - Invoke XDR decode function + * @task: controlling RPC task + * @xdr: stream where the Reply message resides + * + * Returns zero on success; otherwise a negative errno is returned. + */ +int +rpcauth_unwrap_resp_decode(struct rpc_task *task, struct xdr_stream *xdr) { - struct xdr_stream xdr; + kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data); - return decode(rqstp, &xdr, obj); + return decode(task->tk_rqstp, xdr, task->tk_msg.rpc_resp); } +EXPORT_SYMBOL_GPL(rpcauth_unwrap_resp_decode); +/** + * rpcauth_unwrap_resp - Invoke unwrap and decode function for the cred + * @task: controlling RPC task + * @xdr: stream where the Reply message resides + * + * Returns zero on success; otherwise a negative errno is returned. + */ int -rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, - __be32 *data, void *obj) +rpcauth_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr) { - struct rpc_cred *cred = task->tk_rqstp->rq_cred; + const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops; - dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n", - task->tk_pid, cred->cr_ops->cr_name, cred); - if (cred->cr_ops->crunwrap_resp) - return cred->cr_ops->crunwrap_resp(task, decode, rqstp, - data, obj); - /* By default, we decode the arguments normally. */ - return rpcauth_unwrap_req_decode(decode, rqstp, data, obj); + return ops->crunwrap_resp(task, xdr); } bool @@ -865,8 +879,6 @@ rpcauth_refreshcred(struct rpc_task *task) goto out; cred = task->tk_rqstp->rq_cred; } - dprintk("RPC: %5u refreshing %s cred %p\n", - task->tk_pid, cred->cr_auth->au_ops->au_name, cred); err = cred->cr_ops->crrefresh(task); out: @@ -880,8 +892,6 @@ rpcauth_invalcred(struct rpc_task *task) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; - dprintk("RPC: %5u invalidating %s cred %p\n", - task->tk_pid, cred->cr_auth->au_ops->au_name, cred); if (cred) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); } diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o auth_rpcgss-y := auth_gss.o gss_generic_token.o \ gss_mech_switch.o svcauth_gss.o \ - gss_rpc_upcall.o gss_rpc_xdr.o + gss_rpc_upcall.o gss_rpc_xdr.o trace.o obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: BSD-3-Clause /* * linux/net/sunrpc/auth_gss/auth_gss.c * @@ -8,34 +9,8 @@ * * Dug Song <dugsong@monkey.org> * Andy Adamson <andros@umich.edu> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> @@ -55,6 +30,8 @@ #include "../netns.h" +#include <trace/events/rpcgss.h> + static const struct rpc_authops authgss_ops; static const struct rpc_credops gss_credops; @@ -260,6 +237,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct } ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS); if (ret < 0) { + trace_rpcgss_import_ctx(ret); p = ERR_PTR(ret); goto err; } @@ -275,12 +253,9 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct if (IS_ERR(p)) goto err; done: - dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n", - __func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len, - ctx->gc_acceptor.data); - return p; + trace_rpcgss_context(ctx->gc_expiry, now, timeout, + ctx->gc_acceptor.len, ctx->gc_acceptor.data); err: - dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p)); return p; } @@ -354,10 +329,8 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth if (auth && pos->auth->service != auth->service) continue; refcount_inc(&pos->count); - dprintk("RPC: %s found msg %p\n", __func__, pos); return pos; } - dprintk("RPC: %s found nothing\n", __func__); return NULL; } @@ -456,7 +429,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, size_t buflen = sizeof(gss_msg->databuf); int len; - len = scnprintf(p, buflen, "mech=%s uid=%d ", mech->gm_name, + len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name, from_kuid(&init_user_ns, gss_msg->uid)); buflen -= len; p += len; @@ -467,7 +440,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, * identity that we are authenticating to. */ if (target_name) { - len = scnprintf(p, buflen, "target=%s ", target_name); + len = scnprintf(p, buflen, " target=%s", target_name); buflen -= len; p += len; gss_msg->msg.len += len; @@ -487,11 +460,11 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, char *c = strchr(service_name, '@'); if (!c) - len = scnprintf(p, buflen, "service=%s ", + len = scnprintf(p, buflen, " service=%s", service_name); else len = scnprintf(p, buflen, - "service=%.*s srchost=%s ", + " service=%.*s srchost=%s", (int)(c - service_name), service_name, c + 1); buflen -= len; @@ -500,17 +473,17 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, } if (mech->gm_upcall_enctypes) { - len = scnprintf(p, buflen, "enctypes=%s ", + len = scnprintf(p, buflen, " enctypes=%s", mech->gm_upcall_enctypes); buflen -= len; p += len; gss_msg->msg.len += len; } + trace_rpcgss_upcall_msg(gss_msg->databuf); len = scnprintf(p, buflen, "\n"); if (len == 0) goto out_overflow; gss_msg->msg.len += len; - gss_msg->msg.data = gss_msg->databuf; return 0; out_overflow: @@ -603,8 +576,6 @@ gss_refresh_upcall(struct rpc_task *task) struct rpc_pipe *pipe; int err = 0; - dprintk("RPC: %5u %s for uid %u\n", - task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid)); gss_msg = gss_setup_upcall(gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { /* XXX: warning on the first, under the assumption we @@ -612,7 +583,8 @@ gss_refresh_upcall(struct rpc_task *task) warn_gssd(); task->tk_timeout = 15*HZ; rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL); - return -EAGAIN; + err = -EAGAIN; + goto out; } if (IS_ERR(gss_msg)) { err = PTR_ERR(gss_msg); @@ -635,9 +607,8 @@ gss_refresh_upcall(struct rpc_task *task) spin_unlock(&pipe->lock); gss_release_msg(gss_msg); out: - dprintk("RPC: %5u %s for uid %u result %d\n", - task->tk_pid, __func__, - from_kuid(&init_user_ns, cred->cr_cred->fsuid), err); + trace_rpcgss_upcall_result(from_kuid(&init_user_ns, + cred->cr_cred->fsuid), err); return err; } @@ -652,14 +623,13 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) DEFINE_WAIT(wait); int err; - dprintk("RPC: %s for uid %u\n", - __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid)); retry: err = 0; /* if gssd is down, just skip upcalling altogether */ if (!gssd_running(net)) { warn_gssd(); - return -EACCES; + err = -EACCES; + goto out; } gss_msg = gss_setup_upcall(gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { @@ -700,8 +670,8 @@ out_intr: finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); out: - dprintk("RPC: %s for uid %u result %d\n", - __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid), err); + trace_rpcgss_upcall_result(from_kuid(&init_user_ns, + cred->cr_cred->fsuid), err); return err; } @@ -794,7 +764,6 @@ err_put_ctx: err: kfree(buf); out: - dprintk("RPC: %s returning %zd\n", __func__, err); return err; } @@ -863,8 +832,6 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); if (msg->errno < 0) { - dprintk("RPC: %s releasing msg %p\n", - __func__, gss_msg); refcount_inc(&gss_msg->count); gss_unhash_msg(gss_msg); if (msg->errno == -ETIMEDOUT) @@ -1024,8 +991,6 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) struct rpc_auth * auth; int err = -ENOMEM; /* XXX? */ - dprintk("RPC: creating GSS authenticator for client %p\n", clnt); - if (!try_module_get(THIS_MODULE)) return ERR_PTR(err); if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) @@ -1041,10 +1006,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) gss_auth->net = get_net(rpc_net_ns(clnt)); err = -EINVAL; gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); - if (!gss_auth->mech) { - dprintk("RPC: Pseudoflavor %d not found!\n", flavor); + if (!gss_auth->mech) goto err_put_net; - } gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); if (gss_auth->service == 0) goto err_put_mech; @@ -1053,6 +1016,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) auth = &gss_auth->rpc_auth; auth->au_cslack = GSS_CRED_SLACK >> 2; auth->au_rslack = GSS_VERF_SLACK >> 2; + auth->au_verfsize = GSS_VERF_SLACK >> 2; + auth->au_ralign = GSS_VERF_SLACK >> 2; auth->au_flags = 0; auth->au_ops = &authgss_ops; auth->au_flavor = flavor; @@ -1099,6 +1064,7 @@ err_free: kfree(gss_auth); out_dec: module_put(THIS_MODULE); + trace_rpcgss_createauth(flavor, err); return ERR_PTR(err); } @@ -1135,9 +1101,6 @@ gss_destroy(struct rpc_auth *auth) struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); - dprintk("RPC: destroying GSS authenticator %p flavor %d\n", - auth, auth->au_flavor); - if (hash_hashed(&gss_auth->hash)) { spin_lock(&gss_auth_hash_lock); hash_del(&gss_auth->hash); @@ -1245,7 +1208,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred) struct gss_cred *new; /* Make a copy of the cred so that we can reference count it */ - new = kzalloc(sizeof(*gss_cred), GFP_NOIO); + new = kzalloc(sizeof(*gss_cred), GFP_NOFS); if (new) { struct auth_cred acred = { .cred = gss_cred->gc_base.cr_cred, @@ -1300,8 +1263,6 @@ gss_send_destroy_context(struct rpc_cred *cred) static void gss_do_free_ctx(struct gss_cl_ctx *ctx) { - dprintk("RPC: %s\n", __func__); - gss_delete_sec_context(&ctx->gc_gss_ctx); kfree(ctx->gc_wire_ctx.data); kfree(ctx->gc_acceptor.data); @@ -1324,7 +1285,6 @@ gss_free_ctx(struct gss_cl_ctx *ctx) static void gss_free_cred(struct gss_cred *gss_cred) { - dprintk("RPC: %s cred=%p\n", __func__, gss_cred); kfree(gss_cred); } @@ -1381,10 +1341,6 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t struct gss_cred *cred = NULL; int err = -ENOMEM; - dprintk("RPC: %s for uid %d, flavor %d\n", - __func__, from_kuid(&init_user_ns, acred->cred->fsuid), - auth->au_flavor); - if (!(cred = kzalloc(sizeof(*cred), gfp))) goto out_err; @@ -1400,7 +1356,6 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t return &cred->gc_base; out_err: - dprintk("RPC: %s failed with error %d\n", __func__, err); return ERR_PTR(err); } @@ -1526,69 +1481,84 @@ out: } /* -* Marshal credentials. -* Maybe we should keep a cached credential for performance reasons. -*/ -static __be32 * -gss_marshal(struct rpc_task *task, __be32 *p) + * Marshal credentials. + * + * The expensive part is computing the verifier. We can't cache a + * pre-computed version of the verifier because the seqno, which + * is different every time, is included in the MIC. + */ +static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_cred *cred = req->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - __be32 *cred_len; + __be32 *p, *cred_len; u32 maj_stat = 0; struct xdr_netobj mic; struct kvec iov; struct xdr_buf verf_buf; + int status; - dprintk("RPC: %5u %s\n", task->tk_pid, __func__); + /* Credential */ - *p++ = htonl(RPC_AUTH_GSS); + p = xdr_reserve_space(xdr, 7 * sizeof(*p) + + ctx->gc_wire_ctx.len); + if (!p) + goto marshal_failed; + *p++ = rpc_auth_gss; cred_len = p++; spin_lock(&ctx->gc_seq_lock); req->rq_seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ; spin_unlock(&ctx->gc_seq_lock); if (req->rq_seqno == MAXSEQ) - goto out_expired; + goto expired; + trace_rpcgss_seqno(task); - *p++ = htonl((u32) RPC_GSS_VERSION); - *p++ = htonl((u32) ctx->gc_proc); - *p++ = htonl((u32) req->rq_seqno); - *p++ = htonl((u32) gss_cred->gc_service); + *p++ = cpu_to_be32(RPC_GSS_VERSION); + *p++ = cpu_to_be32(ctx->gc_proc); + *p++ = cpu_to_be32(req->rq_seqno); + *p++ = cpu_to_be32(gss_cred->gc_service); p = xdr_encode_netobj(p, &ctx->gc_wire_ctx); - *cred_len = htonl((p - (cred_len + 1)) << 2); + *cred_len = cpu_to_be32((p - (cred_len + 1)) << 2); + + /* Verifier */ /* We compute the checksum for the verifier over the xdr-encoded bytes * starting with the xid and ending at the end of the credential: */ - iov.iov_base = xprt_skip_transport_header(req->rq_xprt, - req->rq_snd_buf.head[0].iov_base); + iov.iov_base = req->rq_snd_buf.head[0].iov_base; iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; xdr_buf_from_iov(&iov, &verf_buf); - /* set verifier flavor*/ - *p++ = htonl(RPC_AUTH_GSS); - + p = xdr_reserve_space(xdr, sizeof(*p)); + if (!p) + goto marshal_failed; + *p++ = rpc_auth_gss; mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); - if (maj_stat == GSS_S_CONTEXT_EXPIRED) { - goto out_expired; - } else if (maj_stat != 0) { - pr_warn("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); - task->tk_status = -EIO; - goto out_put_ctx; - } - p = xdr_encode_opaque(p, NULL, mic.len); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + goto expired; + else if (maj_stat != 0) + goto bad_mic; + if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0) + goto marshal_failed; + status = 0; +out: gss_put_ctx(ctx); - return p; -out_expired: + return status; +expired: clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); - task->tk_status = -EKEYEXPIRED; -out_put_ctx: - gss_put_ctx(ctx); - return NULL; + status = -EKEYEXPIRED; + goto out; +marshal_failed: + status = -EMSGSIZE; + goto out; +bad_mic: + trace_rpcgss_get_mic(task, maj_stat); + status = -EIO; + goto out; } static int gss_renew_cred(struct rpc_task *task) @@ -1662,116 +1632,105 @@ gss_refresh_null(struct rpc_task *task) return 0; } -static __be32 * -gss_validate(struct rpc_task *task, __be32 *p) +static int +gss_validate(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - __be32 *seq = NULL; + __be32 *p, *seq = NULL; struct kvec iov; struct xdr_buf verf_buf; struct xdr_netobj mic; - u32 flav,len; - u32 maj_stat; - __be32 *ret = ERR_PTR(-EIO); + u32 len, maj_stat; + int status; - dprintk("RPC: %5u %s\n", task->tk_pid, __func__); + p = xdr_inline_decode(xdr, 2 * sizeof(*p)); + if (!p) + goto validate_failed; + if (*p++ != rpc_auth_gss) + goto validate_failed; + len = be32_to_cpup(p); + if (len > RPC_MAX_AUTH_SIZE) + goto validate_failed; + p = xdr_inline_decode(xdr, len); + if (!p) + goto validate_failed; - flav = ntohl(*p++); - if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE) - goto out_bad; - if (flav != RPC_AUTH_GSS) - goto out_bad; seq = kmalloc(4, GFP_NOFS); if (!seq) - goto out_bad; - *seq = htonl(task->tk_rqstp->rq_seqno); + goto validate_failed; + *seq = cpu_to_be32(task->tk_rqstp->rq_seqno); iov.iov_base = seq; iov.iov_len = 4; xdr_buf_from_iov(&iov, &verf_buf); mic.data = (u8 *)p; mic.len = len; - - ret = ERR_PTR(-EACCES); maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); - if (maj_stat) { - dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n", - task->tk_pid, __func__, maj_stat); - goto out_bad; - } + if (maj_stat) + goto bad_mic; + /* We leave it to unwrap to calculate au_rslack. For now we just * calculate the length of the verifier: */ cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; + status = 0; +out: gss_put_ctx(ctx); - dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n", - task->tk_pid, __func__); - kfree(seq); - return p + XDR_QUADLEN(len); -out_bad: - gss_put_ctx(ctx); - dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__, - PTR_ERR(ret)); kfree(seq); - return ret; -} - -static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp, - __be32 *p, void *obj) -{ - struct xdr_stream xdr; + return status; - xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p); - encode(rqstp, &xdr, obj); +validate_failed: + status = -EIO; + goto out; +bad_mic: + trace_rpcgss_verify_mic(task, maj_stat); + status = -EACCES; + goto out; } -static inline int -gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - kxdreproc_t encode, struct rpc_rqst *rqstp, - __be32 *p, void *obj) +static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, + struct rpc_task *task, struct xdr_stream *xdr) { - struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; - struct xdr_buf integ_buf; - __be32 *integ_len = NULL; + struct rpc_rqst *rqstp = task->tk_rqstp; + struct xdr_buf integ_buf, *snd_buf = &rqstp->rq_snd_buf; struct xdr_netobj mic; - u32 offset; - __be32 *q; - struct kvec *iov; - u32 maj_stat = 0; - int status = -EIO; + __be32 *p, *integ_len; + u32 offset, maj_stat; + p = xdr_reserve_space(xdr, 2 * sizeof(*p)); + if (!p) + goto wrap_failed; integ_len = p++; - offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; - *p++ = htonl(rqstp->rq_seqno); + *p = cpu_to_be32(rqstp->rq_seqno); - gss_wrap_req_encode(encode, rqstp, p, obj); + if (rpcauth_wrap_req_encode(task, xdr)) + goto wrap_failed; + offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; if (xdr_buf_subsegment(snd_buf, &integ_buf, offset, snd_buf->len - offset)) - return status; - *integ_len = htonl(integ_buf.len); + goto wrap_failed; + *integ_len = cpu_to_be32(integ_buf.len); - /* guess whether we're in the head or the tail: */ - if (snd_buf->page_len || snd_buf->tail[0].iov_len) - iov = snd_buf->tail; - else - iov = snd_buf->head; - p = iov->iov_base + iov->iov_len; + p = xdr_reserve_space(xdr, 0); + if (!p) + goto wrap_failed; mic.data = (u8 *)(p + 1); - maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); - status = -EIO; /* XXX? */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) - return status; - q = xdr_encode_opaque(p, NULL, mic.len); - - offset = (u8 *)q - (u8 *)p; - iov->iov_len += offset; - snd_buf->len += offset; + goto bad_mic; + /* Check that the trailing MIC fit in the buffer, after the fact */ + if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0) + goto wrap_failed; return 0; +wrap_failed: + return -EMSGSIZE; +bad_mic: + trace_rpcgss_get_mic(task, maj_stat); + return -EIO; } static void @@ -1822,61 +1781,62 @@ out: return -EAGAIN; } -static inline int -gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - kxdreproc_t encode, struct rpc_rqst *rqstp, - __be32 *p, void *obj) +static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, + struct rpc_task *task, struct xdr_stream *xdr) { + struct rpc_rqst *rqstp = task->tk_rqstp; struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; - u32 offset; - u32 maj_stat; + u32 pad, offset, maj_stat; int status; - __be32 *opaque_len; + __be32 *p, *opaque_len; struct page **inpages; int first; - int pad; struct kvec *iov; - char *tmp; + status = -EIO; + p = xdr_reserve_space(xdr, 2 * sizeof(*p)); + if (!p) + goto wrap_failed; opaque_len = p++; - offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; - *p++ = htonl(rqstp->rq_seqno); + *p = cpu_to_be32(rqstp->rq_seqno); - gss_wrap_req_encode(encode, rqstp, p, obj); + if (rpcauth_wrap_req_encode(task, xdr)) + goto wrap_failed; status = alloc_enc_pages(rqstp); - if (status) - return status; + if (unlikely(status)) + goto wrap_failed; first = snd_buf->page_base >> PAGE_SHIFT; inpages = snd_buf->pages + first; snd_buf->pages = rqstp->rq_enc_pages; snd_buf->page_base -= first << PAGE_SHIFT; /* - * Give the tail its own page, in case we need extra space in the - * head when wrapping: + * Move the tail into its own page, in case gss_wrap needs + * more space in the head when wrapping. * - * call_allocate() allocates twice the slack space required - * by the authentication flavor to rq_callsize. - * For GSS, slack is GSS_CRED_SLACK. + * Still... Why can't gss_wrap just slide the tail down? */ if (snd_buf->page_len || snd_buf->tail[0].iov_len) { + char *tmp; + tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]); memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len); snd_buf->tail[0].iov_base = tmp; } + offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); /* slack space should prevent this ever happening: */ - BUG_ON(snd_buf->len > snd_buf->buflen); - status = -EIO; + if (unlikely(snd_buf->len > snd_buf->buflen)) + goto wrap_failed; /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was * done anyway, so it's safe to put the request on the wire: */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) - return status; + goto bad_wrap; - *opaque_len = htonl(snd_buf->len - offset); - /* guess whether we're in the head or the tail: */ + *opaque_len = cpu_to_be32(snd_buf->len - offset); + /* guess whether the pad goes into the head or the tail: */ if (snd_buf->page_len || snd_buf->tail[0].iov_len) iov = snd_buf->tail; else @@ -1888,118 +1848,154 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, snd_buf->len += pad; return 0; +wrap_failed: + return status; +bad_wrap: + trace_rpcgss_wrap(task, maj_stat); + return -EIO; } -static int -gss_wrap_req(struct rpc_task *task, - kxdreproc_t encode, void *rqstp, __be32 *p, void *obj) +static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - int status = -EIO; + int status; - dprintk("RPC: %5u %s\n", task->tk_pid, __func__); + status = -EIO; if (ctx->gc_proc != RPC_GSS_PROC_DATA) { /* The spec seems a little ambiguous here, but I think that not * wrapping context destruction requests makes the most sense. */ - gss_wrap_req_encode(encode, rqstp, p, obj); - status = 0; + status = rpcauth_wrap_req_encode(task, xdr); goto out; } switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: - gss_wrap_req_encode(encode, rqstp, p, obj); - status = 0; + status = rpcauth_wrap_req_encode(task, xdr); break; case RPC_GSS_SVC_INTEGRITY: - status = gss_wrap_req_integ(cred, ctx, encode, rqstp, p, obj); + status = gss_wrap_req_integ(cred, ctx, task, xdr); break; case RPC_GSS_SVC_PRIVACY: - status = gss_wrap_req_priv(cred, ctx, encode, rqstp, p, obj); + status = gss_wrap_req_priv(cred, ctx, task, xdr); break; + default: + status = -EIO; } out: gss_put_ctx(ctx); - dprintk("RPC: %5u %s returning %d\n", task->tk_pid, __func__, status); return status; } -static inline int -gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - struct rpc_rqst *rqstp, __be32 **p) +static int +gss_unwrap_resp_auth(struct rpc_cred *cred) { - struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; - struct xdr_buf integ_buf; + struct rpc_auth *auth = cred->cr_auth; + + auth->au_rslack = auth->au_verfsize; + auth->au_ralign = auth->au_verfsize; + return 0; +} + +static int +gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred, + struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp, + struct xdr_stream *xdr) +{ + struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf; + u32 data_offset, mic_offset, integ_len, maj_stat; + struct rpc_auth *auth = cred->cr_auth; struct xdr_netobj mic; - u32 data_offset, mic_offset; - u32 integ_len; - u32 maj_stat; - int status = -EIO; + __be32 *p; - integ_len = ntohl(*(*p)++); + p = xdr_inline_decode(xdr, 2 * sizeof(*p)); + if (unlikely(!p)) + goto unwrap_failed; + integ_len = be32_to_cpup(p++); if (integ_len & 3) - return status; - data_offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base; + goto unwrap_failed; + data_offset = (u8 *)(p) - (u8 *)rcv_buf->head[0].iov_base; mic_offset = integ_len + data_offset; if (mic_offset > rcv_buf->len) - return status; - if (ntohl(*(*p)++) != rqstp->rq_seqno) - return status; - - if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, - mic_offset - data_offset)) - return status; + goto unwrap_failed; + if (be32_to_cpup(p) != rqstp->rq_seqno) + goto bad_seqno; + if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len)) + goto unwrap_failed; if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) - return status; - + goto unwrap_failed; maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) - return status; + goto bad_mic; + + auth->au_rslack = auth->au_verfsize + 2 + 1 + XDR_QUADLEN(mic.len); + auth->au_ralign = auth->au_verfsize + 2; return 0; +unwrap_failed: + trace_rpcgss_unwrap_failed(task); + return -EIO; +bad_seqno: + trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(p)); + return -EIO; +bad_mic: + trace_rpcgss_verify_mic(task, maj_stat); + return -EIO; } -static inline int -gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - struct rpc_rqst *rqstp, __be32 **p) -{ - struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; - u32 offset; - u32 opaque_len; - u32 maj_stat; - int status = -EIO; - - opaque_len = ntohl(*(*p)++); - offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base; +static int +gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, + struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp, + struct xdr_stream *xdr) +{ + struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; + struct kvec *head = rqstp->rq_rcv_buf.head; + struct rpc_auth *auth = cred->cr_auth; + unsigned int savedlen = rcv_buf->len; + u32 offset, opaque_len, maj_stat; + __be32 *p; + + p = xdr_inline_decode(xdr, 2 * sizeof(*p)); + if (unlikely(!p)) + goto unwrap_failed; + opaque_len = be32_to_cpup(p++); + offset = (u8 *)(p) - (u8 *)head->iov_base; if (offset + opaque_len > rcv_buf->len) - return status; - /* remove padding: */ + goto unwrap_failed; rcv_buf->len = offset + opaque_len; maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) - return status; - if (ntohl(*(*p)++) != rqstp->rq_seqno) - return status; + goto bad_unwrap; + /* gss_unwrap decrypted the sequence number */ + if (be32_to_cpup(p++) != rqstp->rq_seqno) + goto bad_seqno; - return 0; -} - -static int -gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp, - __be32 *p, void *obj) -{ - struct xdr_stream xdr; + /* gss_unwrap redacts the opaque blob from the head iovec. + * rcv_buf has changed, thus the stream needs to be reset. + */ + xdr_init_decode(xdr, rcv_buf, p, rqstp); - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - return decode(rqstp, &xdr, obj); + auth->au_rslack = auth->au_verfsize + 2 + + XDR_QUADLEN(savedlen - rcv_buf->len); + auth->au_ralign = auth->au_verfsize + 2 + + XDR_QUADLEN(savedlen - rcv_buf->len); + return 0; +unwrap_failed: + trace_rpcgss_unwrap_failed(task); + return -EIO; +bad_seqno: + trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(--p)); + return -EIO; +bad_unwrap: + trace_rpcgss_unwrap(task, maj_stat); + return -EIO; } static bool @@ -2014,14 +2010,14 @@ gss_xmit_need_reencode(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_cred *cred = req->rq_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 win, seq_xmit; + u32 win, seq_xmit = 0; bool ret = true; if (!ctx) - return true; + goto out; if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq))) - goto out; + goto out_ctx; seq_xmit = READ_ONCE(ctx->gc_seq_xmit); while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) { @@ -2030,56 +2026,51 @@ gss_xmit_need_reencode(struct rpc_task *task) seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno); if (seq_xmit == tmp) { ret = false; - goto out; + goto out_ctx; } } win = ctx->gc_win; if (win > 0) ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win); -out: + +out_ctx: gss_put_ctx(ctx); +out: + trace_rpcgss_need_reencode(task, seq_xmit, ret); return ret; } static int -gss_unwrap_resp(struct rpc_task *task, - kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj) +gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr) { - struct rpc_cred *cred = task->tk_rqstp->rq_cred; + struct rpc_rqst *rqstp = task->tk_rqstp; + struct rpc_cred *cred = rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - __be32 *savedp = p; - struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head; - int savedlen = head->iov_len; - int status = -EIO; + int status = -EIO; if (ctx->gc_proc != RPC_GSS_PROC_DATA) goto out_decode; switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: + status = gss_unwrap_resp_auth(cred); break; case RPC_GSS_SVC_INTEGRITY: - status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p); - if (status) - goto out; + status = gss_unwrap_resp_integ(task, cred, ctx, rqstp, xdr); break; case RPC_GSS_SVC_PRIVACY: - status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p); - if (status) - goto out; + status = gss_unwrap_resp_priv(task, cred, ctx, rqstp, xdr); break; } - /* take into account extra slack for integrity and privacy cases: */ - cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp) - + (savedlen - head->iov_len); + if (status) + goto out; + out_decode: - status = gss_unwrap_req_decode(decode, rqstp, p, obj); + status = rpcauth_unwrap_resp_decode(task, xdr); out: gss_put_ctx(ctx); - dprintk("RPC: %5u %s returning %d\n", - task->tk_pid, __func__, status); return status; } diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: BSD-3-Clause /* * linux/net/sunrpc/gss_krb5_mech.c * @@ -6,32 +7,6 @@ * * Andy Adamson <andros@umich.edu> * J. Bruce Fields <bfields@umich.edu> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include <crypto/hash.h> @@ -53,6 +28,7 @@ static struct gss_api_mech gss_kerberos_mech; /* forward declaration */ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = { +#ifndef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES /* * DES (All DES enctypes are mapped to the same gss functionality) */ @@ -74,6 +50,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = { .cksumlength = 8, .keyed_cksum = 0, }, +#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ /* * RC4-HMAC */ diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -570,14 +570,16 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) */ movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len); movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip; - BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > - buf->head[0].iov_len); + if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > + buf->head[0].iov_len) + return GSS_S_FAILURE; memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen); buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip; buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip; /* Trim off the trailing "extra count" and checksum blob */ - xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip); + buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip; + return GSS_S_COMPLETE; } diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: BSD-3-Clause /* * linux/net/sunrpc/gss_mech_switch.c * @@ -5,32 +6,6 @@ * All rights reserved. * * J. Bruce Fields <bfields@umich.edu> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include <linux/types.h> diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -1,21 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/net/sunrpc/gss_rpc_upcall.c * * Copyright (C) 2012 Simo Sorce <simo@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/types.h> diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.h b/net/sunrpc/auth_gss/gss_rpc_upcall.h @@ -1,21 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * linux/net/sunrpc/gss_rpc_upcall.h * * Copyright (C) 2012 Simo Sorce <simo@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _GSS_RPC_UPCALL_H @@ -45,4 +32,5 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data); void init_gssp_clnt(struct sunrpc_net *); int set_gssp_clnt(struct net *); void clear_gssp_clnt(struct sunrpc_net *); + #endif /* _GSS_RPC_UPCALL_H */ diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -1,21 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * GSS Proxy upcall module * * Copyright (C) 2012 Simo Sorce <simo@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/sunrpc/svcauth.h> diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h @@ -1,21 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * GSS Proxy upcall module * * Copyright (C) 2012 Simo Sorce <simo@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _LINUX_GSS_RPC_XDR_H @@ -262,6 +249,4 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, #define GSSX_ARG_wrap_size_limit_sz 0 #define GSSX_RES_wrap_size_limit_sz 0 - - #endif /* _LINUX_GSS_RPC_XDR_H */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Neil Brown <neilb@cse.unsw.edu.au> * J. Bruce Fields <bfields@umich.edu> @@ -896,7 +897,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g if (svc_getnl(&buf->head[0]) != seq) goto out; /* trim off the mic and padding at the end before returning */ - xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4); + buf->len -= 4 + round_up_to_quad(mic.len); stat = 0; out: kfree(mic.data); diff --git a/net/sunrpc/auth_gss/trace.c b/net/sunrpc/auth_gss/trace.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018, 2019 Oracle. All rights reserved. + */ + +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/sched.h> +#include <linux/sunrpc/gss_err.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/rpcgss.h> diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c @@ -59,15 +59,21 @@ nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags) /* * Marshal credential. */ -static __be32 * -nul_marshal(struct rpc_task *task, __be32 *p) +static int +nul_marshal(struct rpc_task *task, struct xdr_stream *xdr) { - *p++ = htonl(RPC_AUTH_NULL); - *p++ = 0; - *p++ = htonl(RPC_AUTH_NULL); - *p++ = 0; - - return p; + __be32 *p; + + p = xdr_reserve_space(xdr, 4 * sizeof(*p)); + if (!p) + return -EMSGSIZE; + /* Credential */ + *p++ = rpc_auth_null; + *p++ = xdr_zero; + /* Verifier */ + *p++ = rpc_auth_null; + *p = xdr_zero; + return 0; } /* @@ -80,25 +86,19 @@ nul_refresh(struct rpc_task *task) return 0; } -static __be32 * -nul_validate(struct rpc_task *task, __be32 *p) +static int +nul_validate(struct rpc_task *task, struct xdr_stream *xdr) { - rpc_authflavor_t flavor; - u32 size; - - flavor = ntohl(*p++); - if (flavor != RPC_AUTH_NULL) { - printk("RPC: bad verf flavor: %u\n", flavor); - return ERR_PTR(-EIO); - } - - size = ntohl(*p++); - if (size != 0) { - printk("RPC: bad verf size: %u\n", size); - return ERR_PTR(-EIO); - } - - return p; + __be32 *p; + + p = xdr_inline_decode(xdr, 2 * sizeof(*p)); + if (!p) + return -EIO; + if (*p++ != rpc_auth_null) + return -EIO; + if (*p != xdr_zero) + return -EIO; + return 0; } const struct rpc_authops authnull_ops = { @@ -114,6 +114,8 @@ static struct rpc_auth null_auth = { .au_cslack = NUL_CALLSLACK, .au_rslack = NUL_REPLYSLACK, + .au_verfsize = NUL_REPLYSLACK, + .au_ralign = NUL_REPLYSLACK, .au_ops = &authnull_ops, .au_flavor = RPC_AUTH_NULL, .au_count = REFCOUNT_INIT(1), @@ -125,8 +127,10 @@ const struct rpc_credops null_credops = { .crdestroy = nul_destroy_cred, .crmatch = nul_match, .crmarshal = nul_marshal, + .crwrap_req = rpcauth_wrap_req_encode, .crrefresh = nul_refresh, .crvalidate = nul_validate, + .crunwrap_resp = rpcauth_unwrap_resp_decode, }; static diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c @@ -28,8 +28,6 @@ static mempool_t *unix_pool; static struct rpc_auth * unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { - dprintk("RPC: creating UNIX authenticator for client %p\n", - clnt); refcount_inc(&unix_auth.au_count); return &unix_auth; } @@ -37,7 +35,6 @@ unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) static void unx_destroy(struct rpc_auth *auth) { - dprintk("RPC: destroying UNIX authenticator %p\n", auth); } /* @@ -48,10 +45,6 @@ unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS); - dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", - from_kuid(&init_user_ns, acred->cred->fsuid), - from_kgid(&init_user_ns, acred->cred->fsgid)); - rpcauth_init_cred(ret, acred, auth, &unix_credops); ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; return ret; @@ -61,7 +54,7 @@ static void unx_free_cred_callback(struct rcu_head *head) { struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu); - dprintk("RPC: unx_free_cred %p\n", rpc_cred); + put_cred(rpc_cred->cr_cred); mempool_free(rpc_cred, unix_pool); } @@ -87,7 +80,7 @@ unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid)) return 0; - if (acred->cred && acred->cred->group_info != NULL) + if (acred->cred->group_info != NULL) groups = acred->cred->group_info->ngroups; if (groups > UNX_NGROUPS) groups = UNX_NGROUPS; @@ -106,37 +99,55 @@ unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) * Marshal credentials. * Maybe we should keep a cached credential for performance reasons. */ -static __be32 * -unx_marshal(struct rpc_task *task, __be32 *p) +static int +unx_marshal(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_clnt *clnt = task->tk_client; struct rpc_cred *cred = task->tk_rqstp->rq_cred; - __be32 *base, *hold; + __be32 *p, *cred_len, *gidarr_len; int i; struct group_info *gi = cred->cr_cred->group_info; - *p++ = htonl(RPC_AUTH_UNIX); - base = p++; - *p++ = htonl(jiffies/HZ); - - /* - * Copy the UTS nodename captured when the client was created. - */ - p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); - - *p++ = htonl((u32) from_kuid(&init_user_ns, cred->cr_cred->fsuid)); - *p++ = htonl((u32) from_kgid(&init_user_ns, cred->cr_cred->fsgid)); - hold = p++; + /* Credential */ + + p = xdr_reserve_space(xdr, 3 * sizeof(*p)); + if (!p) + goto marshal_failed; + *p++ = rpc_auth_unix; + cred_len = p++; + *p++ = xdr_zero; /* stamp */ + if (xdr_stream_encode_opaque(xdr, clnt->cl_nodename, + clnt->cl_nodelen) < 0) + goto marshal_failed; + p = xdr_reserve_space(xdr, 3 * sizeof(*p)); + if (!p) + goto marshal_failed; + *p++ = cpu_to_be32(from_kuid(&init_user_ns, cred->cr_cred->fsuid)); + *p++ = cpu_to_be32(from_kgid(&init_user_ns, cred->cr_cred->fsgid)); + + gidarr_len = p++; if (gi) for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++) - *p++ = htonl((u32) from_kgid(&init_user_ns, gi->gid[i])); - *hold = htonl(p - hold - 1); /* gid array length */ - *base = htonl((p - base - 1) << 2); /* cred length */ + *p++ = cpu_to_be32(from_kgid(&init_user_ns, + gi->gid[i])); + *gidarr_len = cpu_to_be32(p - gidarr_len - 1); + *cred_len = cpu_to_be32((p - cred_len - 1) << 2); + p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2); + if (!p) + goto marshal_failed; + + /* Verifier */ + + p = xdr_reserve_space(xdr, 2 * sizeof(*p)); + if (!p) + goto marshal_failed; + *p++ = rpc_auth_null; + *p = xdr_zero; - *p++ = htonl(RPC_AUTH_NULL); - *p++ = htonl(0); + return 0; - return p; +marshal_failed: + return -EMSGSIZE; } /* @@ -149,29 +160,35 @@ unx_refresh(struct rpc_task *task) return 0; } -static __be32 * -unx_validate(struct rpc_task *task, __be32 *p) +static int +unx_validate(struct rpc_task *task, struct xdr_stream *xdr) { - rpc_authflavor_t flavor; - u32 size; - - flavor = ntohl(*p++); - if (flavor != RPC_AUTH_NULL && - flavor != RPC_AUTH_UNIX && - flavor != RPC_AUTH_SHORT) { - printk("RPC: bad verf flavor: %u\n", flavor); - return ERR_PTR(-EIO); - } - - size = ntohl(*p++); - if (size > RPC_MAX_AUTH_SIZE) { - printk("RPC: giant verf size: %u\n", size); - return ERR_PTR(-EIO); + struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth; + __be32 *p; + u32 size; + + p = xdr_inline_decode(xdr, 2 * sizeof(*p)); + if (!p) + return -EIO; + switch (*p++) { + case rpc_auth_null: + case rpc_auth_unix: + case rpc_auth_short: + break; + default: + return -EIO; } - task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2; - p += (size >> 2); - - return p; + size = be32_to_cpup(p); + if (size > RPC_MAX_AUTH_SIZE) + return -EIO; + p = xdr_inline_decode(xdr, size); + if (!p) + return -EIO; + + auth->au_verfsize = XDR_QUADLEN(size) + 2; + auth->au_rslack = XDR_QUADLEN(size) + 2; + auth->au_ralign = XDR_QUADLEN(size) + 2; + return 0; } int __init rpc_init_authunix(void) @@ -198,6 +215,7 @@ static struct rpc_auth unix_auth = { .au_cslack = UNX_CALLSLACK, .au_rslack = NUL_REPLYSLACK, + .au_verfsize = NUL_REPLYSLACK, .au_ops = &authunix_ops, .au_flavor = RPC_AUTH_UNIX, .au_count = REFCOUNT_INIT(1), @@ -209,6 +227,8 @@ const struct rpc_credops unix_credops = { .crdestroy = unx_destroy_cred, .crmatch = unx_match, .crmarshal = unx_marshal, + .crwrap_req = rpcauth_wrap_req_encode, .crrefresh = unx_refresh, .crvalidate = unx_validate, + .crunwrap_resp = rpcauth_unwrap_resp_decode, }; diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c @@ -235,7 +235,8 @@ out: list_empty(&xprt->bc_pa_list) ? "true" : "false"); } -static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) +static struct rpc_rqst *xprt_get_bc_request(struct rpc_xprt *xprt, __be32 xid, + struct rpc_rqst *new) { struct rpc_rqst *req = NULL; @@ -243,22 +244,20 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) if (atomic_read(&xprt->bc_free_slots) <= 0) goto not_found; if (list_empty(&xprt->bc_pa_list)) { - req = xprt_alloc_bc_req(xprt, GFP_ATOMIC); - if (!req) + if (!new) goto not_found; - list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); + list_add_tail(&new->rq_bc_pa_list, &xprt->bc_pa_list); xprt->bc_alloc_count++; } req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, rq_bc_pa_list); req->rq_reply_bytes_recvd = 0; - req->rq_bytes_sent = 0; memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(req->rq_private_buf)); req->rq_xid = xid; req->rq_connect_cookie = xprt->connect_cookie; -not_found: dprintk("RPC: backchannel req=%p\n", req); +not_found: return req; } @@ -321,18 +320,27 @@ void xprt_free_bc_rqst(struct rpc_rqst *req) */ struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid) { - struct rpc_rqst *req; - - spin_lock(&xprt->bc_pa_lock); - list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) { - if (req->rq_connect_cookie != xprt->connect_cookie) - continue; - if (req->rq_xid == xid) - goto found; - } - req = xprt_alloc_bc_request(xprt, xid); + struct rpc_rqst *req, *new = NULL; + + do { + spin_lock(&xprt->bc_pa_lock); + list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) { + if (req->rq_connect_cookie != xprt->connect_cookie) + continue; + if (req->rq_xid == xid) + goto found; + } + req = xprt_get_bc_request(xprt, xid, new); found: - spin_unlock(&xprt->bc_pa_lock); + spin_unlock(&xprt->bc_pa_lock); + if (new) { + if (req != new) + xprt_free_bc_rqst(new); + break; + } else if (req) + break; + new = xprt_alloc_bc_req(xprt, GFP_KERNEL); + } while (new); return req; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c @@ -66,20 +66,19 @@ static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); static void call_transmit(struct rpc_task *task); -#if defined(CONFIG_SUNRPC_BACKCHANNEL) -static void call_bc_transmit(struct rpc_task *task); -#endif /* CONFIG_SUNRPC_BACKCHANNEL */ static void call_status(struct rpc_task *task); static void call_transmit_status(struct rpc_task *task); static void call_refresh(struct rpc_task *task); static void call_refreshresult(struct rpc_task *task); -static void call_timeout(struct rpc_task *task); static void call_connect(struct rpc_task *task); static void call_connect_status(struct rpc_task *task); -static __be32 *rpc_encode_header(struct rpc_task *task); -static __be32 *rpc_verify_header(struct rpc_task *task); +static int rpc_encode_header(struct rpc_task *task, + struct xdr_stream *xdr); +static int rpc_decode_header(struct rpc_task *task, + struct xdr_stream *xdr); static int rpc_ping(struct rpc_clnt *clnt); +static void rpc_check_timeout(struct rpc_task *task); static void rpc_register_client(struct rpc_clnt *clnt) { @@ -834,9 +833,6 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) if (!(rovr->tk_flags & RPC_TASK_KILLED)) { rovr->tk_flags |= RPC_TASK_KILLED; rpc_exit(rovr, -EIO); - if (RPC_IS_QUEUED(rovr)) - rpc_wake_up_queued_task(rovr->tk_waitqueue, - rovr); } } spin_unlock(&clnt->cl_lock); @@ -1131,6 +1127,8 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, EXPORT_SYMBOL_GPL(rpc_call_async); #if defined(CONFIG_SUNRPC_BACKCHANNEL) +static void call_bc_encode(struct rpc_task *task); + /** * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run * rpc_execute against it @@ -1152,7 +1150,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) task = rpc_new_task(&task_setup_data); xprt_init_bc_request(req, task); - task->tk_action = call_bc_transmit; + task->tk_action = call_bc_encode; atomic_inc(&task->tk_count); WARN_ON_ONCE(atomic_read(&task->tk_count) != 2); rpc_execute(task); @@ -1162,6 +1160,29 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ +/** + * rpc_prepare_reply_pages - Prepare to receive a reply data payload into pages + * @req: RPC request to prepare + * @pages: vector of struct page pointers + * @base: offset in first page where receive should start, in bytes + * @len: expected size of the upper layer data payload, in bytes + * @hdrsize: expected size of upper layer reply header, in XDR words + * + */ +void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages, + unsigned int base, unsigned int len, + unsigned int hdrsize) +{ + /* Subtract one to force an extra word of buffer space for the + * payload's XDR pad to fall into the rcv_buf's tail iovec. + */ + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign - 1; + + xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len); + trace_rpc_reply_pages(req); +} +EXPORT_SYMBOL_GPL(rpc_prepare_reply_pages); + void rpc_call_start(struct rpc_task *task) { @@ -1519,6 +1540,7 @@ call_start(struct rpc_task *task) clnt->cl_stats->rpccnt++; task->tk_action = call_reserve; rpc_task_set_transport(task, clnt); + call_reserve(task); } /* @@ -1532,6 +1554,9 @@ call_reserve(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_reserveresult; xprt_reserve(task); + if (rpc_task_need_resched(task)) + return; + call_reserveresult(task); } static void call_retry_reserve(struct rpc_task *task); @@ -1554,6 +1579,7 @@ call_reserveresult(struct rpc_task *task) if (status >= 0) { if (task->tk_rqstp) { task->tk_action = call_refresh; + call_refresh(task); return; } @@ -1579,6 +1605,7 @@ call_reserveresult(struct rpc_task *task) /* fall through */ case -EAGAIN: /* woken up; retry */ task->tk_action = call_retry_reserve; + call_retry_reserve(task); return; case -EIO: /* probably a shutdown */ break; @@ -1601,6 +1628,9 @@ call_retry_reserve(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_reserveresult; xprt_retry_reserve(task); + if (rpc_task_need_resched(task)) + return; + call_reserveresult(task); } /* @@ -1615,6 +1645,9 @@ call_refresh(struct rpc_task *task) task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; rpcauth_refreshcred(task); + if (rpc_task_need_resched(task)) + return; + call_refreshresult(task); } /* @@ -1633,6 +1666,7 @@ call_refreshresult(struct rpc_task *task) case 0: if (rpcauth_uptodatecred(task)) { task->tk_action = call_allocate; + call_allocate(task); return; } /* Use rate-limiting and a max number of retries if refresh @@ -1651,6 +1685,7 @@ call_refreshresult(struct rpc_task *task) task->tk_cred_retry--; dprintk("RPC: %5u %s: retry refresh creds\n", task->tk_pid, __func__); + call_refresh(task); return; } dprintk("RPC: %5u %s: refresh creds failed with error %d\n", @@ -1665,7 +1700,7 @@ call_refreshresult(struct rpc_task *task) static void call_allocate(struct rpc_task *task) { - unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack; + const struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; @@ -1676,8 +1711,10 @@ call_allocate(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_encode; - if (req->rq_buffer) + if (req->rq_buffer) { + call_encode(task); return; + } if (proc->p_proc != 0) { BUG_ON(proc->p_arglen == 0); @@ -1690,15 +1727,20 @@ call_allocate(struct rpc_task *task) * and reply headers, and convert both values * to byte sizes. */ - req->rq_callsize = RPC_CALLHDRSIZE + (slack << 1) + proc->p_arglen; + req->rq_callsize = RPC_CALLHDRSIZE + (auth->au_cslack << 1) + + proc->p_arglen; req->rq_callsize <<= 2; - req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen; + req->rq_rcvsize = RPC_REPHDRSIZE + auth->au_rslack + proc->p_replen; req->rq_rcvsize <<= 2; status = xprt->ops->buf_alloc(task); xprt_inject_disconnect(xprt); - if (status == 0) + if (status == 0) { + if (rpc_task_need_resched(task)) + return; + call_encode(task); return; + } if (status != -ENOMEM) { rpc_exit(task, status); return; @@ -1728,10 +1770,7 @@ static void rpc_xdr_encode(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - kxdreproc_t encode; - __be32 *p; - - dprint_status(task); + struct xdr_stream xdr; xdr_buf_init(&req->rq_snd_buf, req->rq_buffer, @@ -1740,18 +1779,13 @@ rpc_xdr_encode(struct rpc_task *task) req->rq_rbuffer, req->rq_rcvsize); - p = rpc_encode_header(task); - if (p == NULL) - return; - - encode = task->tk_msg.rpc_proc->p_encode; - if (encode == NULL) + req->rq_snd_buf.head[0].iov_len = 0; + xdr_init_encode(&xdr, &req->rq_snd_buf, + req->rq_snd_buf.head[0].iov_base, req); + if (rpc_encode_header(task, &xdr)) return; - task->tk_status = rpcauth_wrap_req(task, encode, req, p, - task->tk_msg.rpc_argp); - if (task->tk_status == 0) - xprt_request_prepare(req); + task->tk_status = rpcauth_wrap_req(task, &xdr); } /* @@ -1762,6 +1796,7 @@ call_encode(struct rpc_task *task) { if (!rpc_task_need_encode(task)) goto out; + dprint_status(task); /* Encode here so that rpcsec_gss can use correct sequence number. */ rpc_xdr_encode(task); /* Did the encode result in an error condition? */ @@ -1779,6 +1814,8 @@ call_encode(struct rpc_task *task) rpc_exit(task, task->tk_status); } return; + } else { + xprt_request_prepare(task->tk_rqstp); } /* Add task to reply queue before transmission to avoid races */ @@ -1787,6 +1824,25 @@ call_encode(struct rpc_task *task) xprt_request_enqueue_transmit(task); out: task->tk_action = call_bind; + call_bind(task); +} + +/* + * Helpers to check if the task was already transmitted, and + * to take action when that is the case. + */ +static bool +rpc_task_transmitted(struct rpc_task *task) +{ + return !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); +} + +static void +rpc_task_handle_transmitted(struct rpc_task *task) +{ + xprt_end_transmit(task); + task->tk_action = call_transmit_status; + call_transmit_status(task); } /* @@ -1797,14 +1853,25 @@ call_bind(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - dprint_status(task); + if (rpc_task_transmitted(task)) { + rpc_task_handle_transmitted(task); + return; + } - task->tk_action = call_connect; - if (!xprt_bound(xprt)) { - task->tk_action = call_bind_status; - task->tk_timeout = xprt->bind_timeout; - xprt->ops->rpcbind(task); + if (xprt_bound(xprt)) { + task->tk_action = call_connect; + call_connect(task); + return; } + + dprint_status(task); + + task->tk_action = call_bind_status; + if (!xprt_prepare_transmit(task)) + return; + + task->tk_timeout = xprt->bind_timeout; + xprt->ops->rpcbind(task); } /* @@ -1815,10 +1882,16 @@ call_bind_status(struct rpc_task *task) { int status = -EIO; + if (rpc_task_transmitted(task)) { + rpc_task_handle_transmitted(task); + return; + } + if (task->tk_status >= 0) { dprint_status(task); task->tk_status = 0; task->tk_action = call_connect; + call_connect(task); return; } @@ -1841,6 +1914,8 @@ call_bind_status(struct rpc_task *task) task->tk_rebind_retry--; rpc_delay(task, 3*HZ); goto retry_timeout; + case -EAGAIN: + goto retry_timeout; case -ETIMEDOUT: dprintk("RPC: %5u rpcbind request timed out\n", task->tk_pid); @@ -1882,7 +1957,8 @@ call_bind_status(struct rpc_task *task) retry_timeout: task->tk_status = 0; - task->tk_action = call_timeout; + task->tk_action = call_bind; + rpc_check_timeout(task); } /* @@ -1893,21 +1969,31 @@ call_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; + if (rpc_task_transmitted(task)) { + rpc_task_handle_transmitted(task); + return; + } + + if (xprt_connected(xprt)) { + task->tk_action = call_transmit; + call_transmit(task); + return; + } + dprintk("RPC: %5u call_connect xprt %p %s connected\n", task->tk_pid, xprt, (xprt_connected(xprt) ? "is" : "is not")); - task->tk_action = call_transmit; - if (!xprt_connected(xprt)) { - task->tk_action = call_connect_status; - if (task->tk_status < 0) - return; - if (task->tk_flags & RPC_TASK_NOCONNECT) { - rpc_exit(task, -ENOTCONN); - return; - } - xprt_connect(task); + task->tk_action = call_connect_status; + if (task->tk_status < 0) + return; + if (task->tk_flags & RPC_TASK_NOCONNECT) { + rpc_exit(task, -ENOTCONN); + return; } + if (!xprt_prepare_transmit(task)) + return; + xprt_connect(task); } /* @@ -1919,10 +2005,8 @@ call_connect_status(struct rpc_task *task) struct rpc_clnt *clnt = task->tk_client; int status = task->tk_status; - /* Check if the task was already transmitted */ - if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) { - xprt_end_transmit(task); - task->tk_action = call_transmit_status; + if (rpc_task_transmitted(task)) { + rpc_task_handle_transmitted(task); return; } @@ -1937,8 +2021,7 @@ call_connect_status(struct rpc_task *task) break; if (clnt->cl_autobind) { rpc_force_rebind(clnt); - task->tk_action = call_bind; - return; + goto out_retry; } /* fall through */ case -ECONNRESET: @@ -1958,16 +2041,20 @@ call_connect_status(struct rpc_task *task) /* fall through */ case -ENOTCONN: case -EAGAIN: - /* Check for timeouts before looping back to call_bind */ case -ETIMEDOUT: - task->tk_action = call_timeout; - return; + goto out_retry; case 0: clnt->cl_stats->netreconn++; task->tk_action = call_transmit; + call_transmit(task); return; } rpc_exit(task, status); + return; +out_retry: + /* Check for timeouts before looping back to call_bind */ + task->tk_action = call_bind; + rpc_check_timeout(task); } /* @@ -1976,16 +2063,28 @@ call_connect_status(struct rpc_task *task) static void call_transmit(struct rpc_task *task) { + if (rpc_task_transmitted(task)) { + rpc_task_handle_transmitted(task); + return; + } + dprint_status(task); + task->tk_action = call_transmit_status; + if (!xprt_prepare_transmit(task)) + return; task->tk_status = 0; if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) { - if (!xprt_prepare_transmit(task)) + if (!xprt_connected(task->tk_xprt)) { + task->tk_status = -ENOTCONN; return; + } xprt_transmit(task); } - task->tk_action = call_transmit_status; xprt_end_transmit(task); + if (rpc_task_need_resched(task)) + return; + call_transmit_status(task); } /* @@ -2000,8 +2099,12 @@ call_transmit_status(struct rpc_task *task) * Common case: success. Force the compiler to put this * test first. */ - if (task->tk_status == 0) { - xprt_request_wait_receive(task); + if (rpc_task_transmitted(task)) { + if (task->tk_status == 0) + xprt_request_wait_receive(task); + if (rpc_task_need_resched(task)) + return; + call_status(task); return; } @@ -2038,7 +2141,7 @@ call_transmit_status(struct rpc_task *task) trace_xprt_ping(task->tk_xprt, task->tk_status); rpc_exit(task, task->tk_status); - break; + return; } /* fall through */ case -ECONNRESET: @@ -2046,11 +2149,25 @@ call_transmit_status(struct rpc_task *task) case -EADDRINUSE: case -ENOTCONN: case -EPIPE: + task->tk_action = call_bind; + task->tk_status = 0; break; } + rpc_check_timeout(task); } #if defined(CONFIG_SUNRPC_BACKCHANNEL) +static void call_bc_transmit(struct rpc_task *task); +static void call_bc_transmit_status(struct rpc_task *task); + +static void +call_bc_encode(struct rpc_task *task) +{ + xprt_request_enqueue_transmit(task); + task->tk_action = call_bc_transmit; + call_bc_transmit(task); +} + /* * 5b. Send the backchannel RPC reply. On error, drop the reply. In * addition, disconnect on connectivity errors. @@ -2058,26 +2175,23 @@ call_transmit_status(struct rpc_task *task) static void call_bc_transmit(struct rpc_task *task) { - struct rpc_rqst *req = task->tk_rqstp; - - if (rpc_task_need_encode(task)) - xprt_request_enqueue_transmit(task); - if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) - goto out_wakeup; - - if (!xprt_prepare_transmit(task)) - goto out_retry; - - if (task->tk_status < 0) { - printk(KERN_NOTICE "RPC: Could not send backchannel reply " - "error: %d\n", task->tk_status); - goto out_done; + task->tk_action = call_bc_transmit_status; + if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) { + if (!xprt_prepare_transmit(task)) + return; + task->tk_status = 0; + xprt_transmit(task); } + xprt_end_transmit(task); +} - xprt_transmit(task); +static void +call_bc_transmit_status(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; - xprt_end_transmit(task); dprint_status(task); + switch (task->tk_status) { case 0: /* Success */ @@ -2091,8 +2205,14 @@ call_bc_transmit(struct rpc_task *task) case -ENOTCONN: case -EPIPE: break; + case -ENOBUFS: + rpc_delay(task, HZ>>2); + /* fall through */ + case -EBADSLT: case -EAGAIN: - goto out_retry; + task->tk_status = 0; + task->tk_action = call_bc_transmit; + return; case -ETIMEDOUT: /* * Problem reaching the server. Disconnect and let the @@ -2111,18 +2231,11 @@ call_bc_transmit(struct rpc_task *task) * We were unable to reply and will have to drop the * request. The server should reconnect and retransmit. */ - WARN_ON_ONCE(task->tk_status == -EAGAIN); printk(KERN_NOTICE "RPC: Could not send backchannel reply " "error: %d\n", task->tk_status); break; } -out_wakeup: - rpc_wake_up_queued_task(&req->rq_xprt->pending, task); -out_done: task->tk_action = rpc_exit_task; - return; -out_retry: - task->tk_status = 0; } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ @@ -2143,6 +2256,7 @@ call_status(struct rpc_task *task) status = task->tk_status; if (status >= 0) { task->tk_action = call_decode; + call_decode(task); return; } @@ -2154,10 +2268,8 @@ call_status(struct rpc_task *task) case -EHOSTUNREACH: case -ENETUNREACH: case -EPERM: - if (RPC_IS_SOFTCONN(task)) { - rpc_exit(task, status); - break; - } + if (RPC_IS_SOFTCONN(task)) + goto out_exit; /* * Delay any retries for 3 seconds, then handle as if it * were a timeout. @@ -2165,7 +2277,6 @@ call_status(struct rpc_task *task) rpc_delay(task, 3*HZ); /* fall through */ case -ETIMEDOUT: - task->tk_action = call_timeout; break; case -ECONNREFUSED: case -ECONNRESET: @@ -2178,34 +2289,30 @@ call_status(struct rpc_task *task) case -EPIPE: case -ENOTCONN: case -EAGAIN: - task->tk_action = call_encode; break; case -EIO: /* shutdown or soft timeout */ - rpc_exit(task, status); - break; + goto out_exit; default: if (clnt->cl_chatty) printk("%s: RPC call returned error %d\n", clnt->cl_program->name, -status); - rpc_exit(task, status); + goto out_exit; } + task->tk_action = call_encode; + rpc_check_timeout(task); + return; +out_exit: + rpc_exit(task, status); } -/* - * 6a. Handle RPC timeout - * We do not release the request slot, so we keep using the - * same XID for all retransmits. - */ static void -call_timeout(struct rpc_task *task) +rpc_check_timeout(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - if (xprt_adjust_timeout(task->tk_rqstp) == 0) { - dprintk("RPC: %5u call_timeout (minor)\n", task->tk_pid); - goto retry; - } + if (xprt_adjust_timeout(task->tk_rqstp) == 0) + return; dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid); task->tk_timeouts++; @@ -2241,10 +2348,6 @@ call_timeout(struct rpc_task *task) * event? RFC2203 requires the server to drop all such requests. */ rpcauth_invalcred(task); - -retry: - task->tk_action = call_encode; - task->tk_status = 0; } /* @@ -2255,12 +2358,11 @@ call_decode(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; - kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode; - __be32 *p; + struct xdr_stream xdr; dprint_status(task); - if (!decode) { + if (!task->tk_msg.rpc_proc->p_decode) { task->tk_action = rpc_exit_task; return; } @@ -2285,223 +2387,195 @@ call_decode(struct rpc_task *task) WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf, sizeof(req->rq_rcv_buf)) != 0); - if (req->rq_rcv_buf.len < 12) { - if (!RPC_IS_SOFT(task)) { - task->tk_action = call_encode; - goto out_retry; - } - dprintk("RPC: %s: too small RPC reply size (%d bytes)\n", - clnt->cl_program->name, task->tk_status); - task->tk_action = call_timeout; + if (req->rq_rcv_buf.len < 12) goto out_retry; - } - p = rpc_verify_header(task); - if (IS_ERR(p)) { - if (p == ERR_PTR(-EAGAIN)) - goto out_retry; + xdr_init_decode(&xdr, &req->rq_rcv_buf, + req->rq_rcv_buf.head[0].iov_base, req); + switch (rpc_decode_header(task, &xdr)) { + case 0: + task->tk_action = rpc_exit_task; + task->tk_status = rpcauth_unwrap_resp(task, &xdr); + dprintk("RPC: %5u %s result %d\n", + task->tk_pid, __func__, task->tk_status); return; - } - task->tk_action = rpc_exit_task; - - task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, - task->tk_msg.rpc_resp); - - dprintk("RPC: %5u call_decode result %d\n", task->tk_pid, - task->tk_status); - return; + case -EAGAIN: out_retry: - task->tk_status = 0; - /* Note: rpc_verify_header() may have freed the RPC slot */ - if (task->tk_rqstp == req) { - xdr_free_bvec(&req->rq_rcv_buf); - req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0; - if (task->tk_client->cl_discrtry) - xprt_conditional_disconnect(req->rq_xprt, - req->rq_connect_cookie); + task->tk_status = 0; + /* Note: rpc_decode_header() may have freed the RPC slot */ + if (task->tk_rqstp == req) { + xdr_free_bvec(&req->rq_rcv_buf); + req->rq_reply_bytes_recvd = 0; + req->rq_rcv_buf.len = 0; + if (task->tk_client->cl_discrtry) + xprt_conditional_disconnect(req->rq_xprt, + req->rq_connect_cookie); + } + task->tk_action = call_encode; + rpc_check_timeout(task); } } -static __be32 * -rpc_encode_header(struct rpc_task *task) +static int +rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; - __be32 *p = req->rq_svec[0].iov_base; - - /* FIXME: check buffer size? */ - - p = xprt_skip_transport_header(req->rq_xprt, p); - *p++ = req->rq_xid; /* XID */ - *p++ = htonl(RPC_CALL); /* CALL */ - *p++ = htonl(RPC_VERSION); /* RPC version */ - *p++ = htonl(clnt->cl_prog); /* program number */ - *p++ = htonl(clnt->cl_vers); /* program version */ - *p++ = htonl(task->tk_msg.rpc_proc->p_proc); /* procedure */ - p = rpcauth_marshcred(task, p); - if (p) - req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p); - return p; + __be32 *p; + int error; + + error = -EMSGSIZE; + p = xdr_reserve_space(xdr, RPC_CALLHDRSIZE << 2); + if (!p) + goto out_fail; + *p++ = req->rq_xid; + *p++ = rpc_call; + *p++ = cpu_to_be32(RPC_VERSION); + *p++ = cpu_to_be32(clnt->cl_prog); + *p++ = cpu_to_be32(clnt->cl_vers); + *p = cpu_to_be32(task->tk_msg.rpc_proc->p_proc); + + error = rpcauth_marshcred(task, xdr); + if (error < 0) + goto out_fail; + return 0; +out_fail: + trace_rpc_bad_callhdr(task); + rpc_exit(task, error); + return error; } -static __be32 * -rpc_verify_header(struct rpc_task *task) +static noinline int +rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_clnt *clnt = task->tk_client; - struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; - int len = task->tk_rqstp->rq_rcv_buf.len >> 2; - __be32 *p = iov->iov_base; - u32 n; int error = -EACCES; + __be32 *p; - if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) { - /* RFC-1014 says that the representation of XDR data must be a - * multiple of four bytes - * - if it isn't pointer subtraction in the NFS client may give - * undefined results - */ - dprintk("RPC: %5u %s: XDR representation not a multiple of" - " 4 bytes: 0x%x\n", task->tk_pid, __func__, - task->tk_rqstp->rq_rcv_buf.len); - error = -EIO; - goto out_err; - } - if ((len -= 3) < 0) - goto out_overflow; - - p += 1; /* skip XID */ - if ((n = ntohl(*p++)) != RPC_REPLY) { - dprintk("RPC: %5u %s: not an RPC reply: %x\n", - task->tk_pid, __func__, n); - error = -EIO; - goto out_garbage; - } - - if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { - if (--len < 0) - goto out_overflow; - switch ((n = ntohl(*p++))) { - case RPC_AUTH_ERROR: - break; - case RPC_MISMATCH: - dprintk("RPC: %5u %s: RPC call version mismatch!\n", - task->tk_pid, __func__); - error = -EPROTONOSUPPORT; - goto out_err; - default: - dprintk("RPC: %5u %s: RPC call rejected, " - "unknown error: %x\n", - task->tk_pid, __func__, n); - error = -EIO; - goto out_err; - } - if (--len < 0) - goto out_overflow; - switch ((n = ntohl(*p++))) { - case RPC_AUTH_REJECTEDCRED: - case RPC_AUTH_REJECTEDVERF: - case RPCSEC_GSS_CREDPROBLEM: - case RPCSEC_GSS_CTXPROBLEM: - if (!task->tk_cred_retry) - break; - task->tk_cred_retry--; - dprintk("RPC: %5u %s: retry stale creds\n", - task->tk_pid, __func__); - rpcauth_invalcred(task); - /* Ensure we obtain a new XID! */ - xprt_release(task); - task->tk_action = call_reserve; - goto out_retry; - case RPC_AUTH_BADCRED: - case RPC_AUTH_BADVERF: - /* possibly garbled cred/verf? */ - if (!task->tk_garb_retry) - break; - task->tk_garb_retry--; - dprintk("RPC: %5u %s: retry garbled creds\n", - task->tk_pid, __func__); - task->tk_action = call_encode; - goto out_retry; - case RPC_AUTH_TOOWEAK: - printk(KERN_NOTICE "RPC: server %s requires stronger " - "authentication.\n", - task->tk_xprt->servername); - break; - default: - dprintk("RPC: %5u %s: unknown auth error: %x\n", - task->tk_pid, __func__, n); - error = -EIO; - } - dprintk("RPC: %5u %s: call rejected %d\n", - task->tk_pid, __func__, n); - goto out_err; - } - p = rpcauth_checkverf(task, p); - if (IS_ERR(p)) { - error = PTR_ERR(p); - dprintk("RPC: %5u %s: auth check failed with %d\n", - task->tk_pid, __func__, error); - goto out_garbage; /* bad verifier, retry */ - } - len = p - (__be32 *)iov->iov_base - 1; - if (len < 0) - goto out_overflow; - switch ((n = ntohl(*p++))) { - case RPC_SUCCESS: - return p; - case RPC_PROG_UNAVAIL: - dprintk("RPC: %5u %s: program %u is unsupported " - "by server %s\n", task->tk_pid, __func__, - (unsigned int)clnt->cl_prog, - task->tk_xprt->servername); + /* RFC-1014 says that the representation of XDR data must be a + * multiple of four bytes + * - if it isn't pointer subtraction in the NFS client may give + * undefined results + */ + if (task->tk_rqstp->rq_rcv_buf.len & 3) + goto out_badlen; + + p = xdr_inline_decode(xdr, 3 * sizeof(*p)); + if (!p) + goto out_unparsable; + p++; /* skip XID */ + if (*p++ != rpc_reply) + goto out_unparsable; + if (*p++ != rpc_msg_accepted) + goto out_msg_denied; + + error = rpcauth_checkverf(task, xdr); + if (error) + goto out_verifier; + + p = xdr_inline_decode(xdr, sizeof(*p)); + if (!p) + goto out_unparsable; + switch (*p) { + case rpc_success: + return 0; + case rpc_prog_unavail: + trace_rpc__prog_unavail(task); error = -EPFNOSUPPORT; goto out_err; - case RPC_PROG_MISMATCH: - dprintk("RPC: %5u %s: program %u, version %u unsupported " - "by server %s\n", task->tk_pid, __func__, - (unsigned int)clnt->cl_prog, - (unsigned int)clnt->cl_vers, - task->tk_xprt->servername); + case rpc_prog_mismatch: + trace_rpc__prog_mismatch(task); error = -EPROTONOSUPPORT; goto out_err; - case RPC_PROC_UNAVAIL: - dprintk("RPC: %5u %s: proc %s unsupported by program %u, " - "version %u on server %s\n", - task->tk_pid, __func__, - rpc_proc_name(task), - clnt->cl_prog, clnt->cl_vers, - task->tk_xprt->servername); + case rpc_proc_unavail: + trace_rpc__proc_unavail(task); error = -EOPNOTSUPP; goto out_err; - case RPC_GARBAGE_ARGS: - dprintk("RPC: %5u %s: server saw garbage\n", - task->tk_pid, __func__); - break; /* retry */ + case rpc_garbage_args: + trace_rpc__garbage_args(task); + break; default: - dprintk("RPC: %5u %s: server accept status: %x\n", - task->tk_pid, __func__, n); - /* Also retry */ + trace_rpc__unparsable(task); } out_garbage: clnt->cl_stats->rpcgarbage++; if (task->tk_garb_retry) { task->tk_garb_retry--; - dprintk("RPC: %5u %s: retrying\n", - task->tk_pid, __func__); task->tk_action = call_encode; -out_retry: - return ERR_PTR(-EAGAIN); + return -EAGAIN; } out_err: rpc_exit(task, error); - dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid, - __func__, error); - return ERR_PTR(error); -out_overflow: - dprintk("RPC: %5u %s: server reply was truncated.\n", task->tk_pid, - __func__); + return error; + +out_badlen: + trace_rpc__unparsable(task); + error = -EIO; + goto out_err; + +out_unparsable: + trace_rpc__unparsable(task); + error = -EIO; + goto out_garbage; + +out_verifier: + trace_rpc_bad_verifier(task); goto out_garbage; + +out_msg_denied: + p = xdr_inline_decode(xdr, sizeof(*p)); + if (!p) + goto out_unparsable; + switch (*p++) { + case rpc_auth_error: + break; + case rpc_mismatch: + trace_rpc__mismatch(task); + error = -EPROTONOSUPPORT; + goto out_err; + default: + trace_rpc__unparsable(task); + error = -EIO; + goto out_err; + } + + p = xdr_inline_decode(xdr, sizeof(*p)); + if (!p) + goto out_unparsable; + switch (*p++) { + case rpc_autherr_rejectedcred: + case rpc_autherr_rejectedverf: + case rpcsec_gsserr_credproblem: + case rpcsec_gsserr_ctxproblem: + if (!task->tk_cred_retry) + break; + task->tk_cred_retry--; + trace_rpc__stale_creds(task); + rpcauth_invalcred(task); + /* Ensure we obtain a new XID! */ + xprt_release(task); + task->tk_action = call_reserve; + return -EAGAIN; + case rpc_autherr_badcred: + case rpc_autherr_badverf: + /* possibly garbled cred/verf? */ + if (!task->tk_garb_retry) + break; + task->tk_garb_retry--; + trace_rpc__bad_creds(task); + task->tk_action = call_encode; + return -EAGAIN; + case rpc_autherr_tooweak: + trace_rpc__auth_tooweak(task); + pr_warn("RPC: server %s requires stronger authentication.\n", + task->tk_xprt->servername); + break; + default: + trace_rpc__unparsable(task); + error = -EIO; + } + goto out_err; } static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c @@ -19,6 +19,7 @@ #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/freezer.h> +#include <linux/sched/mm.h> #include <linux/sunrpc/clnt.h> @@ -784,8 +785,7 @@ void rpc_exit(struct rpc_task *task, int status) { task->tk_status = status; task->tk_action = rpc_exit_task; - if (RPC_IS_QUEUED(task)) - rpc_wake_up_queued_task(task->tk_waitqueue, task); + rpc_wake_up_queued_task(task->tk_waitqueue, task); } EXPORT_SYMBOL_GPL(rpc_exit); @@ -902,7 +902,10 @@ void rpc_execute(struct rpc_task *task) static void rpc_async_schedule(struct work_struct *work) { + unsigned int pflags = memalloc_nofs_save(); + __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); + memalloc_nofs_restore(pflags); } /** @@ -921,16 +924,13 @@ static void rpc_async_schedule(struct work_struct *work) * Most requests are 'small' (under 2KiB) and can be serviced from a * mempool, ensuring that NFS reads and writes can always proceed, * and that there is good locality of reference for these buffers. - * - * In order to avoid memory starvation triggering more writebacks of - * NFS requests, we avoid using GFP_KERNEL. */ int rpc_malloc(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; size_t size = rqst->rq_callsize + rqst->rq_rcvsize; struct rpc_buffer *buf; - gfp_t gfp = GFP_NOIO | __GFP_NOWARN; + gfp_t gfp = GFP_NOFS; if (RPC_IS_SWAPPER(task)) gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; @@ -1011,7 +1011,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta static struct rpc_task * rpc_alloc_task(void) { - return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO); + return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); } /* @@ -1067,7 +1067,10 @@ static void rpc_free_task(struct rpc_task *task) static void rpc_async_release(struct work_struct *work) { + unsigned int pflags = memalloc_nofs_save(); + rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); + memalloc_nofs_restore(pflags); } static void rpc_release_resources_task(struct rpc_task *task) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c @@ -1145,17 +1145,6 @@ static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, .. #endif /* - * Setup response header for TCP, it has a 4B record length field. - */ -static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) -{ - struct kvec *resv = &rqstp->rq_res.head[0]; - - /* tcp needs a space for the record length... */ - svc_putnl(resv, 0); -} - -/* * Common routine for processing the RPC request. */ static int @@ -1182,10 +1171,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); clear_bit(RQ_DROPME, &rqstp->rq_flags); - /* Setup reply header */ - if (rqstp->rq_prot == IPPROTO_TCP) - svc_tcp_prep_reply_hdr(rqstp); - svc_putu32(resv, rqstp->rq_xid); vers = svc_getnl(argv); @@ -1443,6 +1428,10 @@ svc_process(struct svc_rqst *rqstp) goto out_drop; } + /* Reserve space for the record marker */ + if (rqstp->rq_prot == IPPROTO_TCP) + svc_putnl(resv, 0); + /* Returns 1 for send, 0 for drop */ if (likely(svc_process_common(rqstp, argv, resv))) return svc_send(rqstp); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c @@ -16,6 +16,7 @@ #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/msg_prot.h> #include <linux/bvec.h> +#include <trace/events/sunrpc.h> /* * XDR functions for basic NFS types @@ -162,6 +163,15 @@ xdr_free_bvec(struct xdr_buf *buf) buf->bvec = NULL; } +/** + * xdr_inline_pages - Prepare receive buffer for a large reply + * @xdr: xdr_buf into which reply will be placed + * @offset: expected offset where data payload will start, in bytes + * @pages: vector of struct page pointers + * @base: offset in first page where receive should start, in bytes + * @len: expected size of the upper layer data payload, in bytes + * + */ void xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, struct page **pages, unsigned int base, unsigned int len) @@ -179,6 +189,8 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, tail->iov_base = buf + offset; tail->iov_len = buflen - offset; + if ((xdr->page_len & 3) == 0) + tail->iov_len -= sizeof(__be32); xdr->buflen += len; } @@ -346,13 +358,15 @@ EXPORT_SYMBOL_GPL(_copy_from_pages); * 'len' bytes. The extra data is not lost, but is instead * moved into the inlined pages and/or the tail. */ -static void +static unsigned int xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) { struct kvec *head, *tail; size_t copy, offs; unsigned int pglen = buf->page_len; + unsigned int result; + result = 0; tail = buf->tail; head = buf->head; @@ -366,6 +380,7 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) copy = tail->iov_len - len; memmove((char *)tail->iov_base + len, tail->iov_base, copy); + result += copy; } /* Copy from the inlined pages into the tail */ copy = len; @@ -376,11 +391,13 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) copy = 0; else if (copy > tail->iov_len - offs) copy = tail->iov_len - offs; - if (copy != 0) + if (copy != 0) { _copy_from_pages((char *)tail->iov_base + offs, buf->pages, buf->page_base + pglen + offs - len, copy); + result += copy; + } /* Do we also need to copy data from the head into the tail ? */ if (len > pglen) { offs = copy = len - pglen; @@ -390,6 +407,7 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) (char *)head->iov_base + head->iov_len - offs, copy); + result += copy; } } /* Now handle pages */ @@ -405,12 +423,15 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) _copy_to_pages(buf->pages, buf->page_base, (char *)head->iov_base + head->iov_len - len, copy); + result += copy; } head->iov_len -= len; buf->buflen -= len; /* Have we truncated the message? */ if (buf->len > buf->buflen) buf->len = buf->buflen; + + return result; } /** @@ -422,14 +443,16 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) * 'len' bytes. The extra data is not lost, but is instead * moved into the tail. */ -static void +static unsigned int xdr_shrink_pagelen(struct xdr_buf *buf, size_t len) { struct kvec *tail; size_t copy; unsigned int pglen = buf->page_len; unsigned int tailbuf_len; + unsigned int result; + result = 0; tail = buf->tail; BUG_ON (len > pglen); @@ -447,18 +470,22 @@ xdr_shrink_pagelen(struct xdr_buf *buf, size_t len) if (tail->iov_len > len) { char *p = (char *)tail->iov_base + len; memmove(p, tail->iov_base, tail->iov_len - len); + result += tail->iov_len - len; } else copy = tail->iov_len; /* Copy from the inlined pages into the tail */ _copy_from_pages((char *)tail->iov_base, buf->pages, buf->page_base + pglen - len, copy); + result += copy; } buf->page_len -= len; buf->buflen -= len; /* Have we truncated the message? */ if (buf->len > buf->buflen) buf->len = buf->buflen; + + return result; } void @@ -483,6 +510,7 @@ EXPORT_SYMBOL_GPL(xdr_stream_pos); * @xdr: pointer to xdr_stream struct * @buf: pointer to XDR buffer in which to encode data * @p: current pointer inside XDR buffer + * @rqst: pointer to controlling rpc_rqst, for debugging * * Note: at the moment the RPC client only passes the length of our * scratch buffer in the xdr_buf's header kvec. Previously this @@ -491,7 +519,8 @@ EXPORT_SYMBOL_GPL(xdr_stream_pos); * of the buffer length, and takes care of adjusting the kvec * length for us. */ -void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) +void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, + struct rpc_rqst *rqst) { struct kvec *iov = buf->head; int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; @@ -513,6 +542,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) buf->len += len; iov->iov_len += len; } + xdr->rqst = rqst; } EXPORT_SYMBOL_GPL(xdr_init_encode); @@ -551,9 +581,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, int frag1bytes, frag2bytes; if (nbytes > PAGE_SIZE) - return NULL; /* Bigger buffers require special handling */ + goto out_overflow; /* Bigger buffers require special handling */ if (xdr->buf->len + nbytes > xdr->buf->buflen) - return NULL; /* Sorry, we're totally out of space */ + goto out_overflow; /* Sorry, we're totally out of space */ frag1bytes = (xdr->end - xdr->p) << 2; frag2bytes = nbytes - frag1bytes; if (xdr->iov) @@ -582,6 +612,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, xdr->buf->page_len += frag2bytes; xdr->buf->len += nbytes; return p; +out_overflow: + trace_rpc_xdr_overflow(xdr, nbytes); + return NULL; } /** @@ -819,8 +852,10 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr) * @xdr: pointer to xdr_stream struct * @buf: pointer to XDR buffer from which to decode data * @p: current pointer inside XDR buffer + * @rqst: pointer to controlling rpc_rqst, for debugging */ -void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) +void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, + struct rpc_rqst *rqst) { xdr->buf = buf; xdr->scratch.iov_base = NULL; @@ -836,6 +871,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) xdr->nwords -= p - xdr->p; xdr->p = p; } + xdr->rqst = rqst; } EXPORT_SYMBOL_GPL(xdr_init_decode); @@ -854,7 +890,7 @@ void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, buf->page_len = len; buf->buflen = len; buf->len = len; - xdr_init_decode(xdr, buf, NULL); + xdr_init_decode(xdr, buf, NULL, NULL); } EXPORT_SYMBOL_GPL(xdr_init_decode_pages); @@ -896,20 +932,23 @@ static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes) size_t cplen = (char *)xdr->end - (char *)xdr->p; if (nbytes > xdr->scratch.iov_len) - return NULL; + goto out_overflow; p = __xdr_inline_decode(xdr, cplen); if (p == NULL) return NULL; memcpy(cpdest, p, cplen); + if (!xdr_set_next_buffer(xdr)) + goto out_overflow; cpdest += cplen; nbytes -= cplen; - if (!xdr_set_next_buffer(xdr)) - return NULL; p = __xdr_inline_decode(xdr, nbytes); if (p == NULL) return NULL; memcpy(cpdest, p, nbytes); return xdr->scratch.iov_base; +out_overflow: + trace_rpc_xdr_overflow(xdr, nbytes); + return NULL; } /** @@ -926,14 +965,17 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { __be32 *p; - if (nbytes == 0) + if (unlikely(nbytes == 0)) return xdr->p; if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr)) - return NULL; + goto out_overflow; p = __xdr_inline_decode(xdr, nbytes); if (p != NULL) return p; return xdr_copy_to_scratch(xdr, nbytes); +out_overflow: + trace_rpc_xdr_overflow(xdr, nbytes); + return NULL; } EXPORT_SYMBOL_GPL(xdr_inline_decode); @@ -943,13 +985,17 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) struct kvec *iov; unsigned int nwords = XDR_QUADLEN(len); unsigned int cur = xdr_stream_pos(xdr); + unsigned int copied, offset; if (xdr->nwords == 0) return 0; + /* Realign pages to current pointer position */ - iov = buf->head; + iov = buf->head; if (iov->iov_len > cur) { - xdr_shrink_bufhead(buf, iov->iov_len - cur); + offset = iov->iov_len - cur; + copied = xdr_shrink_bufhead(buf, offset); + trace_rpc_xdr_alignment(xdr, offset, copied); xdr->nwords = XDR_QUADLEN(buf->len - cur); } @@ -961,7 +1007,9 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) len = buf->page_len; else if (nwords < xdr->nwords) { /* Truncate page data and move it into the tail */ - xdr_shrink_pagelen(buf, buf->page_len - len); + offset = buf->page_len - len; + copied = xdr_shrink_pagelen(buf, offset); + trace_rpc_xdr_alignment(xdr, offset, copied); xdr->nwords = XDR_QUADLEN(buf->len - cur); } return len; @@ -1102,47 +1150,6 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, } EXPORT_SYMBOL_GPL(xdr_buf_subsegment); -/** - * xdr_buf_trim - lop at most "len" bytes off the end of "buf" - * @buf: buf to be trimmed - * @len: number of bytes to reduce "buf" by - * - * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note - * that it's possible that we'll trim less than that amount if the xdr_buf is - * too small, or if (for instance) it's all in the head and the parser has - * already read too far into it. - */ -void xdr_buf_trim(struct xdr_buf *buf, unsigned int len) -{ - size_t cur; - unsigned int trim = len; - - if (buf->tail[0].iov_len) { - cur = min_t(size_t, buf->tail[0].iov_len, trim); - buf->tail[0].iov_len -= cur; - trim -= cur; - if (!trim) - goto fix_len; - } - - if (buf->page_len) { - cur = min_t(unsigned int, buf->page_len, trim); - buf->page_len -= cur; - trim -= cur; - if (!trim) - goto fix_len; - } - - if (buf->head[0].iov_len) { - cur = min_t(size_t, buf->head[0].iov_len, trim); - buf->head[0].iov_len -= cur; - trim -= cur; - } -fix_len: - buf->len -= (len - trim); -} -EXPORT_SYMBOL_GPL(xdr_buf_trim); - static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) { unsigned int this_len; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c @@ -49,6 +49,7 @@ #include <linux/sunrpc/metrics.h> #include <linux/sunrpc/bc_xprt.h> #include <linux/rcupdate.h> +#include <linux/sched/mm.h> #include <trace/events/sunrpc.h> @@ -643,11 +644,13 @@ static void xprt_autoclose(struct work_struct *work) { struct rpc_xprt *xprt = container_of(work, struct rpc_xprt, task_cleanup); + unsigned int pflags = memalloc_nofs_save(); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); xprt->ops->close(xprt); xprt_release_write(xprt, NULL); wake_up_bit(&xprt->state, XPRT_LOCKED); + memalloc_nofs_restore(pflags); } /** @@ -1165,6 +1168,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task) /* Note: req is added _before_ pos */ list_add_tail(&req->rq_xmit, &pos->rq_xmit); INIT_LIST_HEAD(&req->rq_xmit2); + trace_xprt_enq_xmit(task, 1); goto out; } } else if (RPC_IS_SWAPPER(task)) { @@ -1176,6 +1180,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task) /* Note: req is added _before_ pos */ list_add_tail(&req->rq_xmit, &pos->rq_xmit); INIT_LIST_HEAD(&req->rq_xmit2); + trace_xprt_enq_xmit(task, 2); goto out; } } else if (!req->rq_seqno) { @@ -1184,11 +1189,13 @@ xprt_request_enqueue_transmit(struct rpc_task *task) continue; list_add_tail(&req->rq_xmit2, &pos->rq_xmit2); INIT_LIST_HEAD(&req->rq_xmit); + trace_xprt_enq_xmit(task, 3); goto out; } } list_add_tail(&req->rq_xmit, &xprt->xmit_queue); INIT_LIST_HEAD(&req->rq_xmit2); + trace_xprt_enq_xmit(task, 4); out: set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); spin_unlock(&xprt->queue_lock); @@ -1313,8 +1320,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) int is_retrans = RPC_WAS_SENT(task); int status; - dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); - if (!req->rq_bytes_sent) { if (xprt_request_data_received(task)) { status = 0; @@ -1325,6 +1330,13 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) status = -EBADMSG; goto out_dequeue; } + if (task->tk_ops->rpc_call_prepare_transmit) { + task->tk_ops->rpc_call_prepare_transmit(task, + task->tk_calldata); + status = task->tk_status; + if (status < 0) + goto out_dequeue; + } } /* @@ -1336,9 +1348,9 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) connect_cookie = xprt->connect_cookie; status = xprt->ops->send_request(req); - trace_xprt_transmit(xprt, req->rq_xid, status); if (status != 0) { req->rq_ntrans--; + trace_xprt_transmit(req, status); return status; } @@ -1347,7 +1359,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) xprt_inject_disconnect(xprt); - dprintk("RPC: %5u xmit complete\n", task->tk_pid); task->tk_flags |= RPC_TASK_SENT; spin_lock_bh(&xprt->transport_lock); @@ -1360,6 +1371,7 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) req->rq_connect_cookie = connect_cookie; out_dequeue: + trace_xprt_transmit(req, status); xprt_request_dequeue_transmit(task); rpc_wake_up_queued_task_set_status(&xprt->sending, task, status); return status; @@ -1599,7 +1611,6 @@ xprt_request_init(struct rpc_task *task) req->rq_buffer = NULL; req->rq_xid = xprt_alloc_xid(xprt); xprt_init_connect_cookie(req, xprt); - req->rq_bytes_sent = 0; req->rq_snd_buf.len = 0; req->rq_snd_buf.buflen = 0; req->rq_rcv_buf.len = 0; @@ -1721,6 +1732,7 @@ void xprt_release(struct rpc_task *task) xprt->ops->buf_free(task); xprt_inject_disconnect(xprt); xdr_free_bvec(&req->rq_rcv_buf); + xdr_free_bvec(&req->rq_snd_buf); if (req->rq_cred != NULL) put_rpccred(req->rq_cred); task->tk_rqstp = NULL; @@ -1749,7 +1761,6 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task) */ xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + xbufp->tail[0].iov_len; - req->rq_bytes_sent = 0; } #endif diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c @@ -123,7 +123,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf, - req->rl_rdmabuf->rg_base); + req->rl_rdmabuf->rg_base, rqst); p = xdr_reserve_space(&req->rl_stream, 28); if (unlikely(!p)) @@ -267,7 +267,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, /* Prepare rqst */ rqst->rq_reply_bytes_recvd = 0; - rqst->rq_bytes_sent = 0; rqst->rq_xid = *p; rqst->rq_private_buf.len = size; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c @@ -391,7 +391,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) */ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, - int nsegs, bool writing, u32 xid, + int nsegs, bool writing, __be32 xid, struct rpcrdma_mr **out) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; @@ -446,7 +446,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, goto out_mapmr_err; ibmr->iova &= 0x00000000ffffffff; - ibmr->iova |= ((u64)cpu_to_be32(xid)) << 32; + ibmr->iova |= ((u64)be32_to_cpu(xid)) << 32; key = (u8)(ibmr->rkey & 0x000000FF); ib_update_fast_reg_key(ibmr, ++key); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -164,6 +164,21 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; } +/* The client is required to provide a Reply chunk if the maximum + * size of the non-payload part of the RPC Reply is larger than + * the inline threshold. + */ +static bool +rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt, + const struct rpc_rqst *rqst) +{ + const struct xdr_buf *buf = &rqst->rq_rcv_buf; + const struct rpcrdma_ia *ia = &r_xprt->rx_ia; + + return buf->head[0].iov_len + buf->tail[0].iov_len < + ia->ri_max_inline_read; +} + /* Split @vec on page boundaries into SGEs. FMR registers pages, not * a byte range. Other modes coalesce these SGEs into a single MR * when they can. @@ -733,7 +748,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); xdr_init_encode(xdr, &req->rl_hdrbuf, - req->rl_rdmabuf->rg_base); + req->rl_rdmabuf->rg_base, rqst); /* Fixed header fields */ ret = -EMSGSIZE; @@ -762,7 +777,8 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) */ if (rpcrdma_results_inline(r_xprt, rqst)) wtype = rpcrdma_noch; - else if (ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) + else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) && + rpcrdma_nonpayload_inline(r_xprt, rqst)) wtype = rpcrdma_writech; else wtype = rpcrdma_replych; @@ -1313,7 +1329,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) /* Fixed transport header fields */ xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, - rep->rr_hdrbuf.head[0].iov_base); + rep->rr_hdrbuf.head[0].iov_base, NULL); p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p)); if (unlikely(!p)) goto out_shortreply; diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -304,7 +304,6 @@ xprt_setup_rdma_bc(struct xprt_create *args) xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; xprt->prot = XPRT_TRANSPORT_BC_RDMA; - xprt->tsh_size = 0; xprt->ops = &xprt_rdma_bc_procs; memcpy(&xprt->addr, args->dstaddr, args->addrlen); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c @@ -332,7 +332,6 @@ xprt_setup_rdma(struct xprt_create *args) xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; xprt->resvport = 0; /* privileged port not needed */ - xprt->tsh_size = 0; /* RPC-RDMA handles framing */ xprt->ops = &xprt_rdma_procs; /* @@ -738,7 +737,6 @@ xprt_rdma_send_request(struct rpc_rqst *rqst) goto drop_connection; rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; - rqst->rq_bytes_sent = 0; /* An RPC with no reply will throw off credit accounting, * so drop the connection to reset the credit grant. diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c @@ -1481,6 +1481,8 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) if (ep->rep_receive_count > needed) goto out; needed -= ep->rep_receive_count; + if (!temp) + needed += RPCRDMA_MAX_RECV_BATCH; count = 0; wr = NULL; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -205,6 +205,16 @@ struct rpcrdma_rep { struct ib_recv_wr rr_recv_wr; }; +/* To reduce the rate at which a transport invokes ib_post_recv + * (and thus the hardware doorbell rate), xprtrdma posts Receive + * WRs in batches. + * + * Setting this to zero disables Receive post batching. + */ +enum { + RPCRDMA_MAX_RECV_BATCH = 7, +}; + /* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes */ struct rpcrdma_req; @@ -577,7 +587,7 @@ void frwr_release_mr(struct rpcrdma_mr *mr); size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt); struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, - int nsegs, bool writing, u32 xid, + int nsegs, bool writing, __be32 xid, struct rpcrdma_mr **mr); int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req); void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c @@ -50,6 +50,7 @@ #include <linux/bvec.h> #include <linux/highmem.h> #include <linux/uio.h> +#include <linux/sched/mm.h> #include <trace/events/sunrpc.h> @@ -404,8 +405,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, size_t want, seek_init = seek, offset = 0; ssize_t ret; - if (seek < buf->head[0].iov_len) { - want = min_t(size_t, count, buf->head[0].iov_len); + want = min_t(size_t, count, buf->head[0].iov_len); + if (seek < want) { ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek); if (ret <= 0) goto sock_err; @@ -416,13 +417,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, goto out; seek = 0; } else { - seek -= buf->head[0].iov_len; - offset += buf->head[0].iov_len; + seek -= want; + offset += want; } want = xs_alloc_sparse_pages(buf, min_t(size_t, count - offset, buf->page_len), - GFP_NOWAIT); + GFP_KERNEL); if (seek < want) { ret = xs_read_bvec(sock, msg, flags, buf->bvec, xdr_buf_pagecount(buf), @@ -442,8 +443,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, offset += want; } - if (seek < buf->tail[0].iov_len) { - want = min_t(size_t, count - offset, buf->tail[0].iov_len); + want = min_t(size_t, count - offset, buf->tail[0].iov_len); + if (seek < want) { ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek); if (ret <= 0) goto sock_err; @@ -453,7 +454,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, if (ret != want) goto out; } else - offset += buf->tail[0].iov_len; + offset = seek_init; ret = -EMSGSIZE; out: *read = offset - seek_init; @@ -481,6 +482,14 @@ xs_read_stream_request_done(struct sock_xprt *transport) return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT); } +static void +xs_read_stream_check_eor(struct sock_xprt *transport, + struct msghdr *msg) +{ + if (xs_read_stream_request_done(transport)) + msg->msg_flags |= MSG_EOR; +} + static ssize_t xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, int flags, struct rpc_rqst *req) @@ -492,17 +501,21 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, xs_read_header(transport, buf); want = transport->recv.len - transport->recv.offset; - ret = xs_read_xdr_buf(transport->sock, msg, flags, buf, - transport->recv.copied + want, transport->recv.copied, - &read); - transport->recv.offset += read; - transport->recv.copied += read; - if (transport->recv.offset == transport->recv.len) { - if (xs_read_stream_request_done(transport)) - msg->msg_flags |= MSG_EOR; - return read; + if (want != 0) { + ret = xs_read_xdr_buf(transport->sock, msg, flags, buf, + transport->recv.copied + want, + transport->recv.copied, + &read); + transport->recv.offset += read; + transport->recv.copied += read; } + if (transport->recv.offset == transport->recv.len) + xs_read_stream_check_eor(transport, msg); + + if (want == 0) + return 0; + switch (ret) { default: break; @@ -655,13 +668,35 @@ out_err: return ret != 0 ? ret : -ESHUTDOWN; } +static __poll_t xs_poll_socket(struct sock_xprt *transport) +{ + return transport->sock->ops->poll(transport->file, transport->sock, + NULL); +} + +static bool xs_poll_socket_readable(struct sock_xprt *transport) +{ + __poll_t events = xs_poll_socket(transport); + + return (events & (EPOLLIN | EPOLLRDNORM)) && !(events & EPOLLRDHUP); +} + +static void xs_poll_check_readable(struct sock_xprt *transport) +{ + + clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); + if (!xs_poll_socket_readable(transport)) + return; + if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) + queue_work(xprtiod_workqueue, &transport->recv_worker); +} + static void xs_stream_data_receive(struct sock_xprt *transport) { size_t read = 0; ssize_t ret = 0; mutex_lock(&transport->recv_mutex); - clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); if (transport->sock == NULL) goto out; for (;;) { @@ -671,6 +706,10 @@ static void xs_stream_data_receive(struct sock_xprt *transport) read += ret; cond_resched(); } + if (ret == -ESHUTDOWN) + kernel_sock_shutdown(transport->sock, SHUT_RDWR); + else + xs_poll_check_readable(transport); out: mutex_unlock(&transport->recv_mutex); trace_xs_stream_read_data(&transport->xprt, ret, read); @@ -680,7 +719,10 @@ static void xs_stream_data_receive_workfn(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, recv_worker); + unsigned int pflags = memalloc_nofs_save(); + xs_stream_data_receive(transport); + memalloc_nofs_restore(pflags); } static void @@ -690,65 +732,65 @@ xs_stream_reset_connect(struct sock_xprt *transport) transport->recv.len = 0; transport->recv.copied = 0; transport->xmit.offset = 0; +} + +static void +xs_stream_start_connect(struct sock_xprt *transport) +{ transport->xprt.stat.connect_count++; transport->xprt.stat.connect_start = jiffies; } #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) -static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more) +static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek) { - struct msghdr msg = { - .msg_name = addr, - .msg_namelen = addrlen, - .msg_flags = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0), - }; - struct kvec iov = { - .iov_base = vec->iov_base + base, - .iov_len = vec->iov_len - base, - }; + if (seek) + iov_iter_advance(&msg->msg_iter, seek); + return sock_sendmsg(sock, msg); +} - if (iov.iov_len != 0) - return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); - return kernel_sendmsg(sock, &msg, NULL, 0, 0); +static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek) +{ + iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len); + return xs_sendmsg(sock, msg, seek); } -static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p) +static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base) { - ssize_t (*do_sendpage)(struct socket *sock, struct page *page, - int offset, size_t size, int flags); - struct page **ppage; - unsigned int remainder; int err; - remainder = xdr->page_len - base; - base += xdr->page_base; - ppage = xdr->pages + (base >> PAGE_SHIFT); - base &= ~PAGE_MASK; - do_sendpage = sock->ops->sendpage; - if (!zerocopy) - do_sendpage = sock_no_sendpage; - for(;;) { - unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder); - int flags = XS_SENDMSG_FLAGS; + err = xdr_alloc_bvec(xdr, GFP_KERNEL); + if (err < 0) + return err; - remainder -= len; - if (more) - flags |= MSG_MORE; - if (remainder != 0) - flags |= MSG_SENDPAGE_NOTLAST | MSG_MORE; - err = do_sendpage(sock, *ppage, base, len, flags); - if (remainder == 0 || err != len) - break; - *sent_p += err; - ppage++; - base = 0; - } - if (err > 0) { - *sent_p += err; - err = 0; - } - return err; + iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, + xdr_buf_pagecount(xdr), + xdr->page_len + xdr->page_base); + return xs_sendmsg(sock, msg, base + xdr->page_base); +} + +#define xs_record_marker_len() sizeof(rpc_fraghdr) + +/* Common case: + * - stream transport + * - sending from byte 0 of the message + * - the message is wholly contained in @xdr's head iovec + */ +static int xs_send_rm_and_kvec(struct socket *sock, struct msghdr *msg, + rpc_fraghdr marker, struct kvec *vec, size_t base) +{ + struct kvec iov[2] = { + [0] = { + .iov_base = &marker, + .iov_len = sizeof(marker) + }, + [1] = *vec, + }; + size_t len = iov[0].iov_len + iov[1].iov_len; + + iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len); + return xs_sendmsg(sock, msg, base); } /** @@ -758,49 +800,60 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i * @addrlen: UDP only -- length of destination address * @xdr: buffer containing this request * @base: starting position in the buffer - * @zerocopy: true if it is safe to use sendpage() + * @rm: stream record marker field * @sent_p: return the total number of bytes successfully queued for sending * */ -static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p) +static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, rpc_fraghdr rm, int *sent_p) { - unsigned int remainder = xdr->len - base; + struct msghdr msg = { + .msg_name = addr, + .msg_namelen = addrlen, + .msg_flags = XS_SENDMSG_FLAGS | MSG_MORE, + }; + unsigned int rmsize = rm ? sizeof(rm) : 0; + unsigned int remainder = rmsize + xdr->len - base; + unsigned int want; int err = 0; - int sent = 0; if (unlikely(!sock)) return -ENOTSOCK; - if (base != 0) { - addr = NULL; - addrlen = 0; - } - - if (base < xdr->head[0].iov_len || addr != NULL) { - unsigned int len = xdr->head[0].iov_len - base; + want = xdr->head[0].iov_len + rmsize; + if (base < want) { + unsigned int len = want - base; remainder -= len; - err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0); + if (remainder == 0) + msg.msg_flags &= ~MSG_MORE; + if (rmsize) + err = xs_send_rm_and_kvec(sock, &msg, rm, + &xdr->head[0], base); + else + err = xs_send_kvec(sock, &msg, &xdr->head[0], base); if (remainder == 0 || err != len) goto out; *sent_p += err; base = 0; } else - base -= xdr->head[0].iov_len; + base -= want; if (base < xdr->page_len) { unsigned int len = xdr->page_len - base; remainder -= len; - err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy, &sent); - *sent_p += sent; - if (remainder == 0 || sent != len) + if (remainder == 0) + msg.msg_flags &= ~MSG_MORE; + err = xs_send_pagedata(sock, &msg, xdr, base); + if (remainder == 0 || err != len) goto out; + *sent_p += err; base = 0; } else base -= xdr->page_len; if (base >= xdr->tail[0].iov_len) return 0; - err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0); + msg.msg_flags &= ~MSG_MORE; + err = xs_send_kvec(sock, &msg, &xdr->tail[0], base); out: if (err > 0) { *sent_p += err; @@ -856,7 +909,7 @@ static int xs_nospace(struct rpc_rqst *req) static void xs_stream_prepare_request(struct rpc_rqst *req) { - req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_NOIO); + req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL); } /* @@ -870,13 +923,14 @@ xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req) } /* - * Construct a stream transport record marker in @buf. + * Return the stream record marker field for a record of length < 2^31-1 */ -static inline void xs_encode_stream_record_marker(struct xdr_buf *buf) +static rpc_fraghdr +xs_stream_record_marker(struct xdr_buf *xdr) { - u32 reclen = buf->len - sizeof(rpc_fraghdr); - rpc_fraghdr *base = buf->head[0].iov_base; - *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen); + if (!xdr->len) + return 0; + return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len); } /** @@ -905,15 +959,14 @@ static int xs_local_send_request(struct rpc_rqst *req) return -ENOTCONN; } - xs_encode_stream_record_marker(&req->rq_snd_buf); - xs_pktdump("packet data:", req->rq_svec->iov_base, req->rq_svec->iov_len); req->rq_xtime = ktime_get(); status = xs_sendpages(transport->sock, NULL, 0, xdr, transport->xmit.offset, - true, &sent); + xs_stream_record_marker(xdr), + &sent); dprintk("RPC: %s(%u) = %d\n", __func__, xdr->len - transport->xmit.offset, status); @@ -925,7 +978,6 @@ static int xs_local_send_request(struct rpc_rqst *req) req->rq_bytes_sent = transport->xmit.offset; if (likely(req->rq_bytes_sent >= req->rq_slen)) { req->rq_xmit_bytes_sent += transport->xmit.offset; - req->rq_bytes_sent = 0; transport->xmit.offset = 0; return 0; } @@ -981,7 +1033,7 @@ static int xs_udp_send_request(struct rpc_rqst *req) req->rq_xtime = ktime_get(); status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, - xdr, 0, true, &sent); + xdr, 0, 0, &sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len, status); @@ -1045,7 +1097,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req) struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; - bool zerocopy = true; bool vm_wait = false; int status; int sent; @@ -1057,17 +1108,9 @@ static int xs_tcp_send_request(struct rpc_rqst *req) return -ENOTCONN; } - xs_encode_stream_record_marker(&req->rq_snd_buf); - xs_pktdump("packet data:", req->rq_svec->iov_base, req->rq_svec->iov_len); - /* Don't use zero copy if this is a resend. If the RPC call - * completes while the socket holds a reference to the pages, - * then we may end up resending corrupted data. - */ - if (req->rq_task->tk_flags & RPC_TASK_SENT) - zerocopy = false; if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state)) xs_tcp_set_socket_timeouts(xprt, transport->sock); @@ -1080,7 +1123,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req) sent = 0; status = xs_sendpages(transport->sock, NULL, 0, xdr, transport->xmit.offset, - zerocopy, &sent); + xs_stream_record_marker(xdr), + &sent); dprintk("RPC: xs_tcp_send_request(%u) = %d\n", xdr->len - transport->xmit.offset, status); @@ -1091,7 +1135,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req) req->rq_bytes_sent = transport->xmit.offset; if (likely(req->rq_bytes_sent >= req->rq_slen)) { req->rq_xmit_bytes_sent += transport->xmit.offset; - req->rq_bytes_sent = 0; transport->xmit.offset = 0; return 0; } @@ -1211,6 +1254,7 @@ static void xs_reset_transport(struct sock_xprt *transport) struct socket *sock = transport->sock; struct sock *sk = transport->inet; struct rpc_xprt *xprt = &transport->xprt; + struct file *filp = transport->file; if (sk == NULL) return; @@ -1224,6 +1268,7 @@ static void xs_reset_transport(struct sock_xprt *transport) write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; transport->sock = NULL; + transport->file = NULL; sk->sk_user_data = NULL; @@ -1231,10 +1276,12 @@ static void xs_reset_transport(struct sock_xprt *transport) xprt_clear_connected(xprt); write_unlock_bh(&sk->sk_callback_lock); xs_sock_reset_connection_flags(xprt); + /* Reset stream record info */ + xs_stream_reset_connect(transport); mutex_unlock(&transport->recv_mutex); trace_rpc_socket_close(xprt, sock); - sock_release(sock); + fput(filp); xprt_disconnect_done(xprt); } @@ -1358,7 +1405,6 @@ static void xs_udp_data_receive(struct sock_xprt *transport) int err; mutex_lock(&transport->recv_mutex); - clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); sk = transport->inet; if (sk == NULL) goto out; @@ -1370,6 +1416,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) consume_skb(skb); cond_resched(); } + xs_poll_check_readable(transport); out: mutex_unlock(&transport->recv_mutex); } @@ -1378,7 +1425,10 @@ static void xs_udp_data_receive_workfn(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, recv_worker); + unsigned int pflags = memalloc_nofs_save(); + xs_udp_data_receive(transport); + memalloc_nofs_restore(pflags); } /** @@ -1826,6 +1876,7 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, struct sock_xprt *transport, int family, int type, int protocol, bool reuseport) { + struct file *filp; struct socket *sock; int err; @@ -1846,6 +1897,11 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, goto out; } + filp = sock_alloc_file(sock, O_NONBLOCK, NULL); + if (IS_ERR(filp)) + return ERR_CAST(filp); + transport->file = filp; + return sock; out: return ERR_PTR(err); @@ -1869,7 +1925,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, sk->sk_write_space = xs_udp_write_space; sock_set_flag(sk, SOCK_FASYNC); sk->sk_error_report = xs_error_report; - sk->sk_allocation = GFP_NOIO; xprt_clear_connected(xprt); @@ -1880,7 +1935,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, write_unlock_bh(&sk->sk_callback_lock); } - xs_stream_reset_connect(transport); + xs_stream_start_connect(transport); return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0); } @@ -1892,6 +1947,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, static int xs_local_setup_socket(struct sock_xprt *transport) { struct rpc_xprt *xprt = &transport->xprt; + struct file *filp; struct socket *sock; int status = -EIO; @@ -1904,6 +1960,13 @@ static int xs_local_setup_socket(struct sock_xprt *transport) } xs_reclassify_socket(AF_LOCAL, sock); + filp = sock_alloc_file(sock, O_NONBLOCK, NULL); + if (IS_ERR(filp)) { + status = PTR_ERR(filp); + goto out; + } + transport->file = filp; + dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); @@ -2057,7 +2120,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; sock_set_flag(sk, SOCK_FASYNC); - sk->sk_allocation = GFP_NOIO; xprt_set_connected(xprt); @@ -2220,7 +2282,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_write_space = xs_tcp_write_space; sock_set_flag(sk, SOCK_FASYNC); sk->sk_error_report = xs_error_report; - sk->sk_allocation = GFP_NOIO; /* socket options */ sock_reset_flag(sk, SOCK_LINGER); @@ -2240,8 +2301,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_set_memalloc(xprt); - /* Reset TCP record info */ - xs_stream_reset_connect(transport); + xs_stream_start_connect(transport); /* Tell the socket layer to start connecting... */ set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); @@ -2534,26 +2594,35 @@ static int bc_sendto(struct rpc_rqst *req) { int len; struct xdr_buf *xbufp = &req->rq_snd_buf; - struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = - container_of(xprt, struct sock_xprt, xprt); - struct socket *sock = transport->sock; + container_of(req->rq_xprt, struct sock_xprt, xprt); unsigned long headoff; unsigned long tailoff; + struct page *tailpage; + struct msghdr msg = { + .msg_flags = MSG_MORE + }; + rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | + (u32)xbufp->len); + struct kvec iov = { + .iov_base = &marker, + .iov_len = sizeof(marker), + }; - xs_encode_stream_record_marker(xbufp); + len = kernel_sendmsg(transport->sock, &msg, &iov, 1, iov.iov_len); + if (len != iov.iov_len) + return -EAGAIN; + tailpage = NULL; + if (xbufp->tail[0].iov_len) + tailpage = virt_to_page(xbufp->tail[0].iov_base); tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; - len = svc_send_common(sock, xbufp, + len = svc_send_common(transport->sock, xbufp, virt_to_page(xbufp->head[0].iov_base), headoff, - xbufp->tail[0].iov_base, tailoff); - - if (len != xbufp->len) { - printk(KERN_NOTICE "Error sending entire callback!\n"); - len = -EAGAIN; - } - + tailpage, tailoff); + if (len != xbufp->len) + return -EAGAIN; return len; } @@ -2793,7 +2862,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) transport = container_of(xprt, struct sock_xprt, xprt); xprt->prot = 0; - xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; xprt->bind_timeout = XS_BIND_TO; @@ -2862,7 +2930,6 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) transport = container_of(xprt, struct sock_xprt, xprt); xprt->prot = IPPROTO_UDP; - xprt->tsh_size = 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); @@ -2942,7 +3009,6 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) transport = container_of(xprt, struct sock_xprt, xprt); xprt->prot = IPPROTO_TCP; - xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; xprt->bind_timeout = XS_BIND_TO; @@ -3015,7 +3081,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) transport = container_of(xprt, struct sock_xprt, xprt); xprt->prot = IPPROTO_TCP; - xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; xprt->timeout = &xs_tcp_default_timeout;