whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 9931a07d518e86eb58a75e508ed9626f86359303
parent e468f5c06b5ebef3f6f3c187e51aa6daab667e57
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Thu,  1 Nov 2018 19:58:52 -0700

Merge branch 'work.afs' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull AFS updates from Al Viro:
 "AFS series, with some iov_iter bits included"

* 'work.afs' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (26 commits)
  missing bits of "iov_iter: Separate type from direction and use accessor functions"
  afs: Probe multiple fileservers simultaneously
  afs: Fix callback handling
  afs: Eliminate the address pointer from the address list cursor
  afs: Allow dumping of server cursor on operation failure
  afs: Implement YFS support in the fs client
  afs: Expand data structure fields to support YFS
  afs: Get the target vnode in afs_rmdir() and get a callback on it
  afs: Calc callback expiry in op reply delivery
  afs: Fix FS.FetchStatus delivery from updating wrong vnode
  afs: Implement the YFS cache manager service
  afs: Remove callback details from afs_callback_break struct
  afs: Commit the status on a new file/dir/symlink
  afs: Increase to 64-bit volume ID and 96-bit vnode ID for YFS
  afs: Don't invoke the server to read data beyond EOF
  afs: Add a couple of tracepoints to log I/O errors
  afs: Handle EIO from delivery function
  afs: Fix TTL on VL server and address lists
  afs: Implement VL server rotation
  afs: Improve FS server rotation error handling
  ...

Diffstat:
Mblock/bio.c | 2+-
Mdrivers/block/drbd/drbd_main.c | 2+-
Mdrivers/block/drbd/drbd_receiver.c | 2+-
Mdrivers/block/loop.c | 9++++-----
Mdrivers/block/nbd.c | 12+++++-------
Mdrivers/fsi/fsi-sbefifo.c | 4++--
Mdrivers/gpu/drm/amd/display/dc/os_types.h | 2--
Mdrivers/isdn/mISDN/l1oip_core.c | 3+--
Mdrivers/misc/vmw_vmci/vmci_queue_pair.c | 6+++---
Mdrivers/nvme/target/io-cmd-file.c | 2+-
Mdrivers/target/iscsi/iscsi_target_util.c | 6++----
Mdrivers/target/target_core_file.c | 6+++---
Mdrivers/usb/usbip/usbip_common.c | 2+-
Mdrivers/xen/pvcalls-back.c | 8++++----
Mfs/9p/vfs_addr.c | 4++--
Mfs/9p/vfs_dir.c | 2+-
Mfs/9p/xattr.c | 4++--
Mfs/afs/Kconfig | 12++++++++++++
Mfs/afs/Makefile | 7++++++-
Mfs/afs/addr_list.c | 209+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Mfs/afs/afs.h | 50++++++++++++++++++++++++++------------------------
Mfs/afs/cache.c | 2+-
Mfs/afs/callback.c | 17++++++++---------
Mfs/afs/cell.c | 65++++++++++++++++++++++++++++++++++++++++++-----------------------
Mfs/afs/cmservice.c | 287++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Mfs/afs/dir.c | 75+++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mfs/afs/dynroot.c | 4++--
Mfs/afs/file.c | 8++++----
Mfs/afs/flock.c | 22+++++++++++-----------
Afs/afs/fs_probe.c | 270+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mfs/afs/fsclient.c | 583+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Mfs/afs/inode.c | 37++++++++++++++++++++-----------------
Mfs/afs/internal.h | 322++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Mfs/afs/mntpt.c | 5+++--
Mfs/afs/proc.c | 110+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
Afs/afs/protocol_yfs.h | 163+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mfs/afs/rotate.c | 302+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mfs/afs/rxrpc.c | 115++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Mfs/afs/security.c | 13+++++++------
Mfs/afs/server.c | 145+++++++++++--------------------------------------------------------------------
Mfs/afs/server_list.c | 6+++---
Mfs/afs/super.c | 5+++--
Afs/afs/vl_list.c | 340+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afs/afs/vl_probe.c | 273+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afs/afs/vl_rotate.c | 355+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mfs/afs/vlclient.c | 195++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mfs/afs/volume.c | 56++++++++++----------------------------------------------
Mfs/afs/write.c | 30+++++++++++++++++++++---------
Mfs/afs/xattr.c | 2+-
Afs/afs/yfsclient.c | 2184+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mfs/block_dev.c | 2+-
Mfs/ceph/file.c | 9++++-----
Mfs/cifs/connect.c | 4++--
Mfs/cifs/file.c | 4++--
Mfs/cifs/misc.c | 4++--
Mfs/cifs/smb2ops.c | 4++--
Mfs/cifs/smbdirect.c | 17+++++++++++++----
Mfs/cifs/transport.c | 8+++-----
Mfs/direct-io.c | 2+-
Mfs/dlm/lowcomms.c | 2+-
Mfs/fuse/file.c | 2+-
Mfs/iomap.c | 2+-
Mfs/nfsd/vfs.c | 4++--
Mfs/ocfs2/cluster/tcp.c | 2+-
Mfs/orangefs/inode.c | 2+-
Mfs/splice.c | 7+++----
Minclude/linux/uio.h | 65+++++++++++++++++++++++++++++++++++++++++++++--------------------
Minclude/trace/events/afs.h | 213++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Mlib/iov_iter.c | 125+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Mmm/filemap.c | 2+-
Mmm/page_io.c | 2+-
Mnet/9p/client.c | 2+-
Mnet/9p/trans_virtio.c | 2+-
Mnet/bluetooth/6lowpan.c | 2+-
Mnet/bluetooth/a2mp.c | 2+-
Mnet/bluetooth/smp.c | 2+-
Mnet/ceph/messenger.c | 6+++---
Mnet/netfilter/ipvs/ip_vs_sync.c | 2+-
Mnet/smc/smc_clc.c | 4++--
Mnet/socket.c | 6+++---
Mnet/sunrpc/svcsock.c | 2+-
Mnet/sunrpc/xprtsock.c | 4++--
Mnet/tipc/topsrv.c | 2+-
Mnet/tls/tls_device.c | 4++--
Mnet/tls/tls_sw.c | 4++--
85 files changed, 5632 insertions(+), 1239 deletions(-)

diff --git a/block/bio.c b/block/bio.c @@ -1256,7 +1256,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, /* * success */ - if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) || + if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) || (map_data && map_data->from_user)) { ret = bio_copy_from_iter(bio, iter); if (ret) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c @@ -1856,7 +1856,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock, /* THINK if (signal_pending) return ... ? */ - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, size); + iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size); if (sock == connection->data.socket) { rcu_read_lock(); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c @@ -516,7 +516,7 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag struct msghdr msg = { .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL) }; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, size); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size); return sock_recvmsg(sock, &msg, msg.msg_flags); } diff --git a/drivers/block/loop.c b/drivers/block/loop.c @@ -269,7 +269,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) struct iov_iter i; ssize_t bw; - iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len); + iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len); file_start_write(file); bw = vfs_iter_write(file, &i, ppos, 0); @@ -347,7 +347,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq, ssize_t len; rq_for_each_segment(bvec, rq, iter) { - iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len); + iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len); len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); if (len < 0) return len; @@ -388,7 +388,7 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq, b.bv_offset = 0; b.bv_len = bvec.bv_len; - iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len); + iov_iter_bvec(&i, READ, &b, 1, b.bv_len); len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); if (len < 0) { ret = len; @@ -555,8 +555,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, } atomic_set(&cmd->ref, 2); - iov_iter_bvec(&iter, ITER_BVEC | rw, bvec, - segments, blk_rq_bytes(rq)); + iov_iter_bvec(&iter, rw, bvec, segments, blk_rq_bytes(rq)); iter.iov_offset = offset; cmd->iocb.ki_pos = pos; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c @@ -473,7 +473,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) u32 nbd_cmd_flags = 0; int sent = nsock->sent, skip = 0; - iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); + iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); switch (req_op(req)) { case REQ_OP_DISCARD: @@ -564,8 +564,7 @@ send_pages: dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", req, bvec.bv_len); - iov_iter_bvec(&from, ITER_BVEC | WRITE, - &bvec, 1, bvec.bv_len); + iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len); if (skip) { if (skip >= iov_iter_count(&from)) { skip -= iov_iter_count(&from); @@ -624,7 +623,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) int ret = 0; reply.magic = 0; - iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); + iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply)); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); if (result <= 0) { if (!nbd_disconnected(config)) @@ -678,8 +677,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) struct bio_vec bvec; rq_for_each_segment(bvec, req, iter) { - iov_iter_bvec(&to, ITER_BVEC | READ, - &bvec, 1, bvec.bv_len); + iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", @@ -1073,7 +1071,7 @@ static void send_disconnects(struct nbd_device *nbd) for (i = 0; i < config->num_connections; i++) { struct nbd_sock *nsock = config->socks[i]; - iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); + iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); mutex_lock(&nsock->tx_lock); ret = sock_xmit(nbd, i, 1, &from, 0, NULL); if (ret <= 0) diff --git a/drivers/fsi/fsi-sbefifo.c b/drivers/fsi/fsi-sbefifo.c @@ -638,7 +638,7 @@ static void sbefifo_collect_async_ffdc(struct sbefifo *sbefifo) } ffdc_iov.iov_base = ffdc; ffdc_iov.iov_len = SBEFIFO_MAX_FFDC_SIZE; - iov_iter_kvec(&ffdc_iter, WRITE | ITER_KVEC, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE); + iov_iter_kvec(&ffdc_iter, WRITE, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE); cmd[0] = cpu_to_be32(2); cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_SBE_FFDC); rc = sbefifo_do_command(sbefifo, cmd, 2, &ffdc_iter); @@ -735,7 +735,7 @@ int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len, rbytes = (*resp_len) * sizeof(__be32); resp_iov.iov_base = response; resp_iov.iov_len = rbytes; - iov_iter_kvec(&resp_iter, WRITE | ITER_KVEC, &resp_iov, 1, rbytes); + iov_iter_kvec(&resp_iter, WRITE, &resp_iov, 1, rbytes); /* Perform the command */ mutex_lock(&sbefifo->lock); diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -40,8 +40,6 @@ #define LITTLEENDIAN_CPU #endif -#undef READ -#undef WRITE #undef FRAME_SIZE #define dm_output_to_console(fmt, ...) DRM_DEBUG_KMS(fmt, ##__VA_ARGS__) diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c @@ -718,8 +718,7 @@ l1oip_socket_thread(void *data) printk(KERN_DEBUG "%s: socket created and open\n", __func__); while (!signal_pending(current)) { - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, - recvbuf_size); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, recvbuf_size); recvlen = sock_recvmsg(socket, &msg, 0); if (recvlen > 0) { l1oip_socket_parse(hc, &sin_rx, recvbuf, recvlen); diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -3030,7 +3030,7 @@ ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair, if (!qpair || !buf) return VMCI_ERROR_INVALID_ARGS; - iov_iter_kvec(&from, WRITE | ITER_KVEC, &v, 1, buf_size); + iov_iter_kvec(&from, WRITE, &v, 1, buf_size); qp_lock(qpair); @@ -3074,7 +3074,7 @@ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair, if (!qpair || !buf) return VMCI_ERROR_INVALID_ARGS; - iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size); + iov_iter_kvec(&to, READ, &v, 1, buf_size); qp_lock(qpair); @@ -3119,7 +3119,7 @@ ssize_t vmci_qpair_peek(struct vmci_qp *qpair, if (!qpair || !buf) return VMCI_ERROR_INVALID_ARGS; - iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size); + iov_iter_kvec(&to, READ, &v, 1, buf_size); qp_lock(qpair); diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c @@ -101,7 +101,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, rw = READ; } - iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count); + iov_iter_bvec(&iter, rw, req->f.bvec, nr_segs, count); iocb->ki_pos = pos; iocb->ki_filp = req->ns->file; diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c @@ -1245,8 +1245,7 @@ static int iscsit_do_rx_data( return -1; memset(&msg, 0, sizeof(struct msghdr)); - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, - count->iov, count->iov_count, data); + iov_iter_kvec(&msg.msg_iter, READ, count->iov, count->iov_count, data); while (msg_data_left(&msg)) { rx_loop = sock_recvmsg(conn->sock, &msg, MSG_WAITALL); @@ -1302,8 +1301,7 @@ int tx_data( memset(&msg, 0, sizeof(struct msghdr)); - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, - iov, iov_count, data); + iov_iter_kvec(&msg.msg_iter, WRITE, iov, iov_count, data); while (msg_data_left(&msg)) { int tx_loop = sock_sendmsg(conn->sock, &msg); diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c @@ -303,7 +303,7 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, len += sg->length; } - iov_iter_bvec(&iter, ITER_BVEC | is_write, bvec, sgl_nents, len); + iov_iter_bvec(&iter, is_write, bvec, sgl_nents, len); aio_cmd->cmd = cmd; aio_cmd->len = len; @@ -353,7 +353,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd, len += sg->length; } - iov_iter_bvec(&iter, ITER_BVEC, bvec, sgl_nents, len); + iov_iter_bvec(&iter, READ, bvec, sgl_nents, len); if (is_write) ret = vfs_iter_write(fd, &iter, &pos, 0); else @@ -490,7 +490,7 @@ fd_execute_write_same(struct se_cmd *cmd) len += se_dev->dev_attrib.block_size; } - iov_iter_bvec(&iter, ITER_BVEC, bvec, nolb, len); + iov_iter_bvec(&iter, READ, bvec, nolb, len); ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos, 0); kfree(bvec); diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c @@ -309,7 +309,7 @@ int usbip_recv(struct socket *sock, void *buf, int size) if (!sock || !buf || !size) return -EINVAL; - iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size); usbip_dbg_xmit("enter\n"); diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c @@ -137,13 +137,13 @@ static void pvcalls_conn_back_read(void *opaque) if (masked_prod < masked_cons) { vec[0].iov_base = data->in + masked_prod; vec[0].iov_len = wanted; - iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 1, wanted); + iov_iter_kvec(&msg.msg_iter, WRITE, vec, 1, wanted); } else { vec[0].iov_base = data->in + masked_prod; vec[0].iov_len = array_size - masked_prod; vec[1].iov_base = data->in; vec[1].iov_len = wanted - vec[0].iov_len; - iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 2, wanted); + iov_iter_kvec(&msg.msg_iter, WRITE, vec, 2, wanted); } atomic_set(&map->read, 0); @@ -195,13 +195,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) { vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); vec[0].iov_len = size; - iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 1, size); + iov_iter_kvec(&msg.msg_iter, READ, vec, 1, size); } else { vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); vec[0].iov_len = array_size - pvcalls_mask(cons, array_size); vec[1].iov_base = data->out; vec[1].iov_len = size - vec[0].iov_len; - iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 2, size); + iov_iter_kvec(&msg.msg_iter, READ, vec, 2, size); } atomic_set(&map->write, 0); diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c @@ -65,7 +65,7 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page) if (retval == 0) return retval; - iov_iter_bvec(&to, ITER_BVEC | READ, &bvec, 1, PAGE_SIZE); + iov_iter_bvec(&to, READ, &bvec, 1, PAGE_SIZE); retval = p9_client_read(fid, page_offset(page), &to, &err); if (err) { @@ -175,7 +175,7 @@ static int v9fs_vfs_writepage_locked(struct page *page) bvec.bv_page = page; bvec.bv_offset = 0; bvec.bv_len = len; - iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, len); + iov_iter_bvec(&from, WRITE, &bvec, 1, len); /* We should have writeback_fid always set */ BUG_ON(!v9inode->writeback_fid); diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c @@ -123,7 +123,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) if (rdir->tail == rdir->head) { struct iov_iter to; int n; - iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buflen); + iov_iter_kvec(&to, READ, &kvec, 1, buflen); n = p9_client_read(file->private_data, ctx->pos, &to, &err); if (err) diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c @@ -32,7 +32,7 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name, struct iov_iter to; int err; - iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buffer_size); + iov_iter_kvec(&to, READ, &kvec, 1, buffer_size); attr_fid = p9_client_xattrwalk(fid, name, &attr_size); if (IS_ERR(attr_fid)) { @@ -107,7 +107,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, struct iov_iter from; int retval, err; - iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len); + iov_iter_kvec(&from, WRITE, &kvec, 1, value_len); p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n", name, value_len, flags); diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig @@ -27,3 +27,15 @@ config AFS_FSCACHE help Say Y here if you want AFS data to be cached locally on disk through the generic filesystem cache manager + +config AFS_DEBUG_CURSOR + bool "AFS server cursor debugging" + depends on AFS_FS + help + Say Y here to cause the contents of a server cursor to be dumped to + the dmesg log if the server rotation algorithm fails to successfully + contact a server. + + See <file:Documentation/filesystems/afs.txt> for more information. + + If unsure, say N. diff --git a/fs/afs/Makefile b/fs/afs/Makefile @@ -17,6 +17,7 @@ kafs-y := \ file.o \ flock.o \ fsclient.o \ + fs_probe.o \ inode.o \ main.o \ misc.o \ @@ -29,9 +30,13 @@ kafs-y := \ super.o \ netdevices.o \ vlclient.o \ + vl_list.o \ + vl_probe.o \ + vl_rotate.o \ volume.o \ write.o \ - xattr.o + xattr.o \ + yfsclient.o kafs-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_AFS_FS) := kafs.o diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c @@ -64,19 +64,25 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, /* * Parse a text string consisting of delimited addresses. */ -struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, - char delim, - unsigned short service, - unsigned short port) +struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net, + const char *text, size_t len, + char delim, + unsigned short service, + unsigned short port) { + struct afs_vlserver_list *vllist; struct afs_addr_list *alist; const char *p, *end = text + len; + const char *problem; unsigned int nr = 0; + int ret = -ENOMEM; _enter("%*.*s,%c", (int)len, (int)len, text, delim); - if (!len) + if (!len) { + _leave(" = -EDESTADDRREQ [empty]"); return ERR_PTR(-EDESTADDRREQ); + } if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len))) delim = ','; @@ -84,18 +90,24 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, /* Count the addresses */ p = text; do { - if (!*p) - return ERR_PTR(-EINVAL); + if (!*p) { + problem = "nul"; + goto inval; + } if (*p == delim) continue; nr++; if (*p == '[') { p++; - if (p == end) - return ERR_PTR(-EINVAL); + if (p == end) { + problem = "brace1"; + goto inval; + } p = memchr(p, ']', end - p); - if (!p) - return ERR_PTR(-EINVAL); + if (!p) { + problem = "brace2"; + goto inval; + } p++; if (p >= end) break; @@ -109,10 +121,19 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES); - alist = afs_alloc_addrlist(nr, service, port); - if (!alist) + vllist = afs_alloc_vlserver_list(1); + if (!vllist) return ERR_PTR(-ENOMEM); + vllist->nr_servers = 1; + vllist->servers[0].server = afs_alloc_vlserver("<dummy>", 7, AFS_VL_PORT); + if (!vllist->servers[0].server) + goto error_vl; + + alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT); + if (!alist) + goto error; + /* Extract the addresses */ p = text; do { @@ -135,17 +156,21 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, break; } - if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) + if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) { family = AF_INET; - else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) + } else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) { family = AF_INET6; - else + } else { + problem = "family"; goto bad_address; + } - if (stop != q) + p = q; + if (stop != p) { + problem = "nostop"; goto bad_address; + } - p = q; if (q < end && *q == ']') p++; @@ -154,18 +179,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, /* Port number specification "+1234" */ xport = 0; p++; - if (p >= end || !isdigit(*p)) + if (p >= end || !isdigit(*p)) { + problem = "port"; goto bad_address; + } do { xport *= 10; xport += *p - '0'; - if (xport > 65535) + if (xport > 65535) { + problem = "pval"; goto bad_address; + } p++; } while (p < end && isdigit(*p)); } else if (*p == delim) { p++; } else { + problem = "weird"; goto bad_address; } } @@ -177,12 +207,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, } while (p < end); + rcu_assign_pointer(vllist->servers[0].server->addresses, alist); _leave(" = [nr %u]", alist->nr_addrs); - return alist; + return vllist; -bad_address: - kfree(alist); +inval: + _leave(" = -EINVAL [%s %zu %*.*s]", + problem, p - text, (int)len, (int)len, text); return ERR_PTR(-EINVAL); +bad_address: + _leave(" = -EINVAL [%s %zu %*.*s]", + problem, p - text, (int)len, (int)len, text); + ret = -EINVAL; +error: + afs_put_addrlist(alist); +error_vl: + afs_put_vlserverlist(net, vllist); + return ERR_PTR(ret); } /* @@ -201,30 +242,34 @@ static int afs_cmp_addr_list(const struct afs_addr_list *a1, /* * Perform a DNS query for VL servers and build a up an address list. */ -struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) +struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) { - struct afs_addr_list *alist; - char *vllist = NULL; + struct afs_vlserver_list *vllist; + char *result = NULL; int ret; _enter("%s", cell->name); - ret = dns_query("afsdb", cell->name, cell->name_len, - "", &vllist, _expiry); - if (ret < 0) + ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1", + &result, _expiry); + if (ret < 0) { + _leave(" = %d [dns]", ret); return ERR_PTR(ret); - - alist = afs_parse_text_addrs(vllist, strlen(vllist), ',', - VL_SERVICE, AFS_VL_PORT); - if (IS_ERR(alist)) { - kfree(vllist); - if (alist != ERR_PTR(-ENOMEM)) - pr_err("Failed to parse DNS data\n"); - return alist; } - kfree(vllist); - return alist; + if (*_expiry == 0) + *_expiry = ktime_get_real_seconds() + 60; + + if (ret > 1 && result[0] == 0) + vllist = afs_extract_vlserver_list(cell, result, ret); + else + vllist = afs_parse_text_addrs(cell->net, result, ret, ',', + VL_SERVICE, AFS_VL_PORT); + kfree(result); + if (IS_ERR(vllist) && vllist != ERR_PTR(-ENOMEM)) + pr_err("Failed to parse DNS data %ld\n", PTR_ERR(vllist)); + + return vllist; } /* @@ -258,6 +303,8 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); srx = &alist->addrs[i]; + srx->srx_family = AF_RXRPC; + srx->transport_type = SOCK_DGRAM; srx->transport_len = sizeof(srx->transport.sin); srx->transport.sin.sin_family = AF_INET; srx->transport.sin.sin_port = htons(port); @@ -296,6 +343,8 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); srx = &alist->addrs[i]; + srx->srx_family = AF_RXRPC; + srx->transport_type = SOCK_DGRAM; srx->transport_len = sizeof(srx->transport.sin6); srx->transport.sin6.sin6_family = AF_INET6; srx->transport.sin6.sin6_port = htons(port); @@ -308,25 +357,33 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) */ bool afs_iterate_addresses(struct afs_addr_cursor *ac) { - _enter("%hu+%hd", ac->start, (short)ac->index); + unsigned long set, failed; + int index; if (!ac->alist) return false; - if (ac->begun) { - ac->index++; - if (ac->index == ac->alist->nr_addrs) - ac->index = 0; + set = ac->alist->responded; + failed = ac->alist->failed; + _enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index); - if (ac->index == ac->start) { - ac->error = -EDESTADDRREQ; - return false; - } - } + ac->nr_iterations++; + + set &= ~(failed | ac->tried); + + if (!set) + return false; - ac->begun = true; + index = READ_ONCE(ac->alist->preferred); + if (test_bit(index, &set)) + goto selected; + + index = __ffs(set); + +selected: + ac->index = index; + set_bit(index, &ac->tried); ac->responded = false; - ac->addr = &ac->alist->addrs[ac->index]; return true; } @@ -339,53 +396,13 @@ int afs_end_cursor(struct afs_addr_cursor *ac) alist = ac->alist; if (alist) { - if (ac->responded && ac->index != ac->start) - WRITE_ONCE(alist->index, ac->index); + if (ac->responded && + ac->index != alist->preferred && + test_bit(ac->alist->preferred, &ac->tried)) + WRITE_ONCE(alist->preferred, ac->index); afs_put_addrlist(alist); + ac->alist = NULL; } - ac->addr = NULL; - ac->alist = NULL; - ac->begun = false; return ac->error; } - -/* - * Set the address cursor for iterating over VL servers. - */ -int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell) -{ - struct afs_addr_list *alist; - int ret; - - if (!rcu_access_pointer(cell->vl_addrs)) { - ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET, - TASK_INTERRUPTIBLE); - if (ret < 0) - return ret; - - if (!rcu_access_pointer(cell->vl_addrs) && - ktime_get_real_seconds() < cell->dns_expiry) - return cell->error; - } - - read_lock(&cell->vl_addrs_lock); - alist = rcu_dereference_protected(cell->vl_addrs, - lockdep_is_held(&cell->vl_addrs_lock)); - if (alist->nr_addrs > 0) - afs_get_addrlist(alist); - else - alist = NULL; - read_unlock(&cell->vl_addrs_lock); - - if (!alist) - return -EDESTADDRREQ; - - ac->alist = alist; - ac->addr = NULL; - ac->start = READ_ONCE(alist->index); - ac->index = ac->start; - ac->error = 0; - ac->begun = false; - return 0; -} diff --git a/fs/afs/afs.h b/fs/afs/afs.h @@ -23,9 +23,9 @@ #define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */ #define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */ -typedef unsigned afs_volid_t; -typedef unsigned afs_vnodeid_t; -typedef unsigned long long afs_dataversion_t; +typedef u64 afs_volid_t; +typedef u64 afs_vnodeid_t; +typedef u64 afs_dataversion_t; typedef enum { AFSVL_RWVOL, /* read/write volume */ @@ -52,8 +52,9 @@ typedef enum { */ struct afs_fid { afs_volid_t vid; /* volume ID */ - afs_vnodeid_t vnode; /* file index within volume */ - unsigned unique; /* unique ID number (file index version) */ + afs_vnodeid_t vnode; /* Lower 64-bits of file index within volume */ + u32 vnode_hi; /* Upper 32-bits of file index */ + u32 unique; /* unique ID number (file index version) */ }; /* @@ -67,14 +68,14 @@ typedef enum { } afs_callback_type_t; struct afs_callback { + time64_t expires_at; /* Time at which expires */ unsigned version; /* Callback version */ - unsigned expiry; /* Time at which expires */ afs_callback_type_t type; /* Type of callback */ }; struct afs_callback_break { struct afs_fid fid; /* File identifier */ - struct afs_callback cb; /* Callback details */ + //struct afs_callback cb; /* Callback details */ }; #define AFSCBMAX 50 /* maximum callbacks transferred per bulk op */ @@ -129,19 +130,18 @@ typedef u32 afs_access_t; struct afs_file_status { u64 size; /* file size */ afs_dataversion_t data_version; /* current data version */ - time_t mtime_client; /* last time client changed data */ - time_t mtime_server; /* last time server changed data */ - unsigned abort_code; /* Abort if bulk-fetching this failed */ - - afs_file_type_t type; /* file type */ - unsigned nlink; /* link count */ - u32 author; /* author ID */ - u32 owner; /* owner ID */ - u32 group; /* group ID */ + struct timespec64 mtime_client; /* Last time client changed data */ + struct timespec64 mtime_server; /* Last time server changed data */ + s64 author; /* author ID */ + s64 owner; /* owner ID */ + s64 group; /* group ID */ afs_access_t caller_access; /* access rights for authenticated caller */ afs_access_t anon_access; /* access rights for unauthenticated caller */ umode_t mode; /* UNIX mode */ + afs_file_type_t type; /* file type */ + u32 nlink; /* link count */ s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */ + u32 abort_code; /* Abort if bulk-fetching this failed */ }; /* @@ -158,25 +158,27 @@ struct afs_file_status { * AFS volume synchronisation information */ struct afs_volsync { - time_t creation; /* volume creation time */ + time64_t creation; /* volume creation time */ }; /* * AFS volume status record */ struct afs_volume_status { - u32 vid; /* volume ID */ - u32 parent_id; /* parent volume ID */ + afs_volid_t vid; /* volume ID */ + afs_volid_t parent_id; /* parent volume ID */ u8 online; /* true if volume currently online and available */ u8 in_service; /* true if volume currently in service */ u8 blessed; /* same as in_service */ u8 needs_salvage; /* true if consistency checking required */ u32 type; /* volume type (afs_voltype_t) */ - u32 min_quota; /* minimum space set aside (blocks) */ - u32 max_quota; /* maximum space this volume may occupy (blocks) */ - u32 blocks_in_use; /* space this volume currently occupies (blocks) */ - u32 part_blocks_avail; /* space available in volume's partition */ - u32 part_max_blocks; /* size of volume's partition */ + u64 min_quota; /* minimum space set aside (blocks) */ + u64 max_quota; /* maximum space this volume may occupy (blocks) */ + u64 blocks_in_use; /* space this volume currently occupies (blocks) */ + u64 part_blocks_avail; /* space available in volume's partition */ + u64 part_max_blocks; /* size of volume's partition */ + s64 vol_copy_date; + s64 vol_backup_date; }; #define AFS_BLOCK_SIZE 1024 diff --git a/fs/afs/cache.c b/fs/afs/cache.c @@ -49,7 +49,7 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, struct afs_vnode *vnode = cookie_netfs_data; struct afs_vnode_cache_aux aux; - _enter("{%x,%x,%llx},%p,%u", + _enter("{%llx,%x,%llx},%p,%u", vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, buffer, buflen); diff --git a/fs/afs/callback.c b/fs/afs/callback.c @@ -210,12 +210,10 @@ void afs_init_callback_state(struct afs_server *server) /* * actually break a callback */ -void afs_break_callback(struct afs_vnode *vnode) +void __afs_break_callback(struct afs_vnode *vnode) { _enter(""); - write_seqlock(&vnode->cb_lock); - clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { vnode->cb_break++; @@ -230,7 +228,12 @@ void afs_break_callback(struct afs_vnode *vnode) afs_lock_may_be_available(vnode); spin_unlock(&vnode->lock); } +} +void afs_break_callback(struct afs_vnode *vnode) +{ + write_seqlock(&vnode->cb_lock); + __afs_break_callback(vnode); write_sequnlock(&vnode->cb_lock); } @@ -310,14 +313,10 @@ void afs_break_callbacks(struct afs_server *server, size_t count, /* TODO: Sort the callback break list by volume ID */ for (; count > 0; callbacks++, count--) { - _debug("- Fid { vl=%08x n=%u u=%u } CB { v=%u x=%u t=%u }", + _debug("- Fid { vl=%08llx n=%llu u=%u }", callbacks->fid.vid, callbacks->fid.vnode, - callbacks->fid.unique, - callbacks->cb.version, - callbacks->cb.expiry, - callbacks->cb.type - ); + callbacks->fid.unique); afs_break_one_callback(server, &callbacks->fid); } diff --git a/fs/afs/cell.c b/fs/afs/cell.c @@ -20,6 +20,8 @@ #include "internal.h" static unsigned __read_mostly afs_cell_gc_delay = 10; +static unsigned __read_mostly afs_cell_min_ttl = 10 * 60; +static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60; static void afs_manage_cell(struct work_struct *); @@ -119,7 +121,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, */ static struct afs_cell *afs_alloc_cell(struct afs_net *net, const char *name, unsigned int namelen, - const char *vllist) + const char *addresses) { struct afs_cell *cell; int i, ret; @@ -134,7 +136,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, if (namelen == 5 && memcmp(name, "@cell", 5) == 0) return ERR_PTR(-EINVAL); - _enter("%*.*s,%s", namelen, namelen, name, vllist); + _enter("%*.*s,%s", namelen, namelen, name, addresses); cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL); if (!cell) { @@ -153,23 +155,26 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, (1 << AFS_CELL_FL_NO_LOOKUP_YET)); INIT_LIST_HEAD(&cell->proc_volumes); rwlock_init(&cell->proc_lock); - rwlock_init(&cell->vl_addrs_lock); + rwlock_init(&cell->vl_servers_lock); /* Fill in the VL server list if we were given a list of addresses to * use. */ - if (vllist) { - struct afs_addr_list *alist; - - alist = afs_parse_text_addrs(vllist, strlen(vllist), ':', - VL_SERVICE, AFS_VL_PORT); - if (IS_ERR(alist)) { - ret = PTR_ERR(alist); + if (addresses) { + struct afs_vlserver_list *vllist; + + vllist = afs_parse_text_addrs(net, + addresses, strlen(addresses), ':', + VL_SERVICE, AFS_VL_PORT); + if (IS_ERR(vllist)) { + ret = PTR_ERR(vllist); goto parse_failed; } - rcu_assign_pointer(cell->vl_addrs, alist); + rcu_assign_pointer(cell->vl_servers, vllist); cell->dns_expiry = TIME64_MAX; + } else { + cell->dns_expiry = ktime_get_real_seconds(); } _leave(" = %p", cell); @@ -356,26 +361,40 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) */ static void afs_update_cell(struct afs_cell *cell) { - struct afs_addr_list *alist, *old; - time64_t now, expiry; + struct afs_vlserver_list *vllist, *old; + unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl); + unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl); + time64_t now, expiry = 0; _enter("%s", cell->name); - alist = afs_dns_query(cell, &expiry); - if (IS_ERR(alist)) { - switch (PTR_ERR(alist)) { + vllist = afs_dns_query(cell, &expiry); + + now = ktime_get_real_seconds(); + if (min_ttl > max_ttl) + max_ttl = min_ttl; + if (expiry < now + min_ttl) + expiry = now + min_ttl; + else if (expiry > now + max_ttl) + expiry = now + max_ttl; + + if (IS_ERR(vllist)) { + switch (PTR_ERR(vllist)) { case -ENODATA: - /* The DNS said that the cell does not exist */ + case -EDESTADDRREQ: + /* The DNS said that the cell does not exist or there + * weren't any addresses to be had. + */ set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); - cell->dns_expiry = ktime_get_real_seconds() + 61; + cell->dns_expiry = expiry; break; case -EAGAIN: case -ECONNREFUSED: default: set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); - cell->dns_expiry = ktime_get_real_seconds() + 10; + cell->dns_expiry = now + 10; break; } @@ -387,12 +406,12 @@ static void afs_update_cell(struct afs_cell *cell) /* Exclusion on changing vl_addrs is achieved by a * non-reentrant work item. */ - old = rcu_dereference_protected(cell->vl_addrs, true); - rcu_assign_pointer(cell->vl_addrs, alist); + old = rcu_dereference_protected(cell->vl_servers, true); + rcu_assign_pointer(cell->vl_servers, vllist); cell->dns_expiry = expiry; if (old) - afs_put_addrlist(old); + afs_put_vlserverlist(cell->net, old); } if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags)) @@ -414,7 +433,7 @@ static void afs_cell_destroy(struct rcu_head *rcu) ASSERTCMP(atomic_read(&cell->usage), ==, 0); - afs_put_addrlist(rcu_access_pointer(cell->vl_addrs)); + afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers)); key_put(cell->anonymous_key); kfree(cell); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c @@ -16,6 +16,7 @@ #include <linux/ip.h> #include "internal.h" #include "afs_cm.h" +#include "protocol_yfs.h" static int afs_deliver_cb_init_call_back_state(struct afs_call *); static int afs_deliver_cb_init_call_back_state3(struct afs_call *); @@ -30,6 +31,8 @@ static void SRXAFSCB_Probe(struct work_struct *); static void SRXAFSCB_ProbeUuid(struct work_struct *); static void SRXAFSCB_TellMeAboutYourself(struct work_struct *); +static int afs_deliver_yfs_cb_callback(struct afs_call *); + #define CM_NAME(name) \ const char afs_SRXCB##name##_name[] __tracepoint_string = \ "CB." #name @@ -101,12 +104,25 @@ static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { }; /* + * YFS CB.CallBack operation type + */ +static CM_NAME(YFS_CallBack); +static const struct afs_call_type afs_SRXYFSCB_CallBack = { + .name = afs_SRXCBYFS_CallBack_name, + .deliver = afs_deliver_yfs_cb_callback, + .destructor = afs_cm_destructor, + .work = SRXAFSCB_CallBack, +}; + +/* * route an incoming cache manager call * - return T if supported, F if not */ bool afs_cm_incoming_call(struct afs_call *call) { - _enter("{CB.OP %u}", call->operation_ID); + _enter("{%u, CB.OP %u}", call->service_id, call->operation_ID); + + call->epoch = rxrpc_kernel_get_epoch(call->net->socket, call->rxcall); switch (call->operation_ID) { case CBCallBack: @@ -127,12 +143,102 @@ bool afs_cm_incoming_call(struct afs_call *call) case CBTellMeAboutYourself: call->type = &afs_SRXCBTellMeAboutYourself; return true; + case YFSCBCallBack: + if (call->service_id != YFS_CM_SERVICE) + return false; + call->type = &afs_SRXYFSCB_CallBack; + return true; default: return false; } } /* + * Record a probe to the cache manager from a server. + */ +static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server) +{ + _enter(""); + + if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) && + !test_bit(AFS_SERVER_FL_PROBING, &server->flags)) { + if (server->cm_epoch == call->epoch) + return 0; + + if (!server->probe.said_rebooted) { + pr_notice("kAFS: FS rebooted %pU\n", &server->uuid); + server->probe.said_rebooted = true; + } + } + + spin_lock(&server->probe_lock); + + if (!test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) { + server->cm_epoch = call->epoch; + server->probe.cm_epoch = call->epoch; + goto out; + } + + if (server->probe.cm_probed && + call->epoch != server->probe.cm_epoch && + !server->probe.said_inconsistent) { + pr_notice("kAFS: FS endpoints inconsistent %pU\n", + &server->uuid); + server->probe.said_inconsistent = true; + } + + if (!server->probe.cm_probed || call->epoch == server->cm_epoch) + server->probe.cm_epoch = server->cm_epoch; + +out: + server->probe.cm_probed = true; + spin_unlock(&server->probe_lock); + return 0; +} + +/* + * Find the server record by peer address and record a probe to the cache + * manager from a server. + */ +static int afs_find_cm_server_by_peer(struct afs_call *call) +{ + struct sockaddr_rxrpc srx; + struct afs_server *server; + + rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); + + server = afs_find_server(call->net, &srx); + if (!server) { + trace_afs_cm_no_server(call, &srx); + return 0; + } + + call->cm_server = server; + return afs_record_cm_probe(call, server); +} + +/* + * Find the server record by server UUID and record a probe to the cache + * manager from a server. + */ +static int afs_find_cm_server_by_uuid(struct afs_call *call, + struct afs_uuid *uuid) +{ + struct afs_server *server; + + rcu_read_lock(); + server = afs_find_server_by_uuid(call->net, call->request); + rcu_read_unlock(); + if (!server) { + trace_afs_cm_no_server_u(call, call->request); + return 0; + } + + call->cm_server = server; + return afs_record_cm_probe(call, server); +} + +/* * Clean up a cache manager call. */ static void afs_cm_destructor(struct afs_call *call) @@ -168,7 +274,6 @@ static void SRXAFSCB_CallBack(struct work_struct *work) static int afs_deliver_cb_callback(struct afs_call *call) { struct afs_callback_break *cb; - struct sockaddr_rxrpc srx; __be32 *bp; int ret, loop; @@ -176,32 +281,32 @@ static int afs_deliver_cb_callback(struct afs_call *call) switch (call->unmarshall) { case 0: - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; /* extract the FID array and its count in two steps */ case 1: _debug("extract FID count"); - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; call->count = ntohl(call->tmp); _debug("FID count: %u", call->count); if (call->count > AFSCBMAX) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_cb_fid_count); call->buffer = kmalloc(array3_size(call->count, 3, 4), GFP_KERNEL); if (!call->buffer) return -ENOMEM; - call->offset = 0; + afs_extract_to_buf(call, call->count * 3 * 4); call->unmarshall++; case 2: _debug("extract FID array"); - ret = afs_extract_data(call, call->buffer, - call->count * 3 * 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -218,59 +323,46 @@ static int afs_deliver_cb_callback(struct afs_call *call) cb->fid.vid = ntohl(*bp++); cb->fid.vnode = ntohl(*bp++); cb->fid.unique = ntohl(*bp++); - cb->cb.type = AFSCM_CB_UNTYPED; } - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; /* extract the callback array and its count in two steps */ case 3: _debug("extract CB count"); - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; call->count2 = ntohl(call->tmp); _debug("CB count: %u", call->count2); if (call->count2 != call->count && call->count2 != 0) - return afs_protocol_error(call, -EBADMSG); - call->offset = 0; + return afs_protocol_error(call, -EBADMSG, + afs_eproto_cb_count); + call->_iter = &call->iter; + iov_iter_discard(&call->iter, READ, call->count2 * 3 * 4); call->unmarshall++; case 4: - _debug("extract CB array"); - ret = afs_extract_data(call, call->buffer, - call->count2 * 3 * 4, false); + _debug("extract discard %zu/%u", + iov_iter_count(&call->iter), call->count2 * 3 * 4); + + ret = afs_extract_data(call, false); if (ret < 0) return ret; - _debug("unmarshall CB array"); - cb = call->request; - bp = call->buffer; - for (loop = call->count2; loop > 0; loop--, cb++) { - cb->cb.version = ntohl(*bp++); - cb->cb.expiry = ntohl(*bp++); - cb->cb.type = ntohl(*bp++); - } - - call->offset = 0; call->unmarshall++; case 5: break; } if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) - return -EIO; + return afs_io_error(call, afs_io_error_cm_reply); /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); - call->cm_server = afs_find_server(call->net, &srx); - if (!call->cm_server) - trace_afs_cm_no_server(call, &srx); - - return afs_queue_call_work(call); + return afs_find_cm_server_by_peer(call); } /* @@ -294,24 +386,18 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) */ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) { - struct sockaddr_rxrpc srx; int ret; _enter(""); - rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); - - ret = afs_extract_data(call, NULL, 0, false); + afs_extract_discard(call, 0); + ret = afs_extract_data(call, false); if (ret < 0) return ret; /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - call->cm_server = afs_find_server(call->net, &srx); - if (!call->cm_server) - trace_afs_cm_no_server(call, &srx); - - return afs_queue_call_work(call); + return afs_find_cm_server_by_peer(call); } /* @@ -330,16 +416,15 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) switch (call->unmarshall) { case 0: - call->offset = 0; call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL); if (!call->buffer) return -ENOMEM; + afs_extract_to_buf(call, 11 * sizeof(__be32)); call->unmarshall++; case 1: _debug("extract UUID"); - ret = afs_extract_data(call, call->buffer, - 11 * sizeof(__be32), false); + ret = afs_extract_data(call, false); switch (ret) { case 0: break; case -EAGAIN: return 0; @@ -362,7 +447,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) for (loop = 0; loop < 6; loop++) r->node[loop] = ntohl(b[loop + 5]); - call->offset = 0; call->unmarshall++; case 2: @@ -370,17 +454,11 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) } if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) - return -EIO; + return afs_io_error(call, afs_io_error_cm_reply); /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - rcu_read_lock(); - call->cm_server = afs_find_server_by_uuid(call->net, call->request); - rcu_read_unlock(); - if (!call->cm_server) - trace_afs_cm_no_server_u(call, call->request); - - return afs_queue_call_work(call); + return afs_find_cm_server_by_uuid(call, call->request); } /* @@ -405,14 +483,14 @@ static int afs_deliver_cb_probe(struct afs_call *call) _enter(""); - ret = afs_extract_data(call, NULL, 0, false); + afs_extract_discard(call, 0); + ret = afs_extract_data(call, false); if (ret < 0) return ret; if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) - return -EIO; - - return afs_queue_call_work(call); + return afs_io_error(call, afs_io_error_cm_reply); + return afs_find_cm_server_by_peer(call); } /* @@ -453,16 +531,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) switch (call->unmarshall) { case 0: - call->offset = 0; call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL); if (!call->buffer) return -ENOMEM; + afs_extract_to_buf(call, 11 * sizeof(__be32)); call->unmarshall++; case 1: _debug("extract UUID"); - ret = afs_extract_data(call, call->buffer, - 11 * sizeof(__be32), false); + ret = afs_extract_data(call, false); switch (ret) { case 0: break; case -EAGAIN: return 0; @@ -485,7 +562,6 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) for (loop = 0; loop < 6; loop++) r->node[loop] = ntohl(b[loop + 5]); - call->offset = 0; call->unmarshall++; case 2: @@ -493,9 +569,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) } if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) - return -EIO; - - return afs_queue_call_work(call); + return afs_io_error(call, afs_io_error_cm_reply); + return afs_find_cm_server_by_uuid(call, call->request); } /* @@ -570,12 +645,88 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) _enter(""); - ret = afs_extract_data(call, NULL, 0, false); + afs_extract_discard(call, 0); + ret = afs_extract_data(call, false); if (ret < 0) return ret; if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) - return -EIO; + return afs_io_error(call, afs_io_error_cm_reply); + return afs_find_cm_server_by_peer(call); +} + +/* + * deliver request data to a YFS CB.CallBack call + */ +static int afs_deliver_yfs_cb_callback(struct afs_call *call) +{ + struct afs_callback_break *cb; + struct yfs_xdr_YFSFid *bp; + size_t size; + int ret, loop; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + + /* extract the FID array and its count in two steps */ + case 1: + _debug("extract FID count"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("FID count: %u", call->count); + if (call->count > YFSCBMAX) + return afs_protocol_error(call, -EBADMSG, + afs_eproto_cb_fid_count); + + size = array_size(call->count, sizeof(struct yfs_xdr_YFSFid)); + call->buffer = kmalloc(size, GFP_KERNEL); + if (!call->buffer) + return -ENOMEM; + afs_extract_to_buf(call, size); + call->unmarshall++; + + case 2: + _debug("extract FID array"); + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + _debug("unmarshall FID array"); + call->request = kcalloc(call->count, + sizeof(struct afs_callback_break), + GFP_KERNEL); + if (!call->request) + return -ENOMEM; + + cb = call->request; + bp = call->buffer; + for (loop = call->count; loop > 0; loop--, cb++) { + cb->fid.vid = xdr_to_u64(bp->volume); + cb->fid.vnode = xdr_to_u64(bp->vnode.lo); + cb->fid.vnode_hi = ntohl(bp->vnode.hi); + cb->fid.unique = ntohl(bp->vnode.unique); + bp++; + } + + afs_extract_to_tmp(call); + call->unmarshall++; + + case 3: + break; + } + + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return afs_io_error(call, afs_io_error_cm_reply); - return afs_queue_call_work(call); + /* We'll need the file server record as that tells us which set of + * vnodes to operate upon. + */ + return afs_find_cm_server_by_peer(call); } diff --git a/fs/afs/dir.c b/fs/afs/dir.c @@ -138,6 +138,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page, ntohs(dbuf->blocks[tmp].hdr.magic)); trace_afs_dir_check_failed(dvnode, off, i_size); kunmap(page); + trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic); goto error; } @@ -190,9 +191,11 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) retry: i_size = i_size_read(&dvnode->vfs_inode); if (i_size < 2048) - return ERR_PTR(-EIO); - if (i_size > 2048 * 1024) + return ERR_PTR(afs_bad(dvnode, afs_file_error_dir_small)); + if (i_size > 2048 * 1024) { + trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big); return ERR_PTR(-EFBIG); + } _enter("%llu", i_size); @@ -315,7 +318,8 @@ content_has_grown: /* * deal with one block in an AFS directory */ -static int afs_dir_iterate_block(struct dir_context *ctx, +static int afs_dir_iterate_block(struct afs_vnode *dvnode, + struct dir_context *ctx, union afs_xdr_dir_block *block, unsigned blkoff) { @@ -365,7 +369,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx, " (len %u/%zu)", blkoff / sizeof(union afs_xdr_dir_block), offset, next, tmp, nlen); - return -EIO; + return afs_bad(dvnode, afs_file_error_dir_over_end); } if (!(block->hdr.bitmap[next / 8] & (1 << (next % 8)))) { @@ -373,7 +377,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx, " %u unmarked extension (len %u/%zu)", blkoff / sizeof(union afs_xdr_dir_block), offset, next, tmp, nlen); - return -EIO; + return afs_bad(dvnode, afs_file_error_dir_unmarked_ext); } _debug("ENT[%zu.%u]: ext %u/%zu", @@ -442,7 +446,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, */ page = req->pages[blkoff / PAGE_SIZE]; if (!page) { - ret = -EIO; + ret = afs_bad(dvnode, afs_file_error_dir_missing_page); break; } mark_page_accessed(page); @@ -455,7 +459,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, do { dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) / sizeof(union afs_xdr_dir_block)]; - ret = afs_dir_iterate_block(ctx, dblock, blkoff); + ret = afs_dir_iterate_block(dvnode, ctx, dblock, blkoff); if (ret != 1) { kunmap(page); goto out; @@ -548,7 +552,7 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, } *fid = cookie.fid; - _leave(" = 0 { vn=%u u=%u }", fid->vnode, fid->unique); + _leave(" = 0 { vn=%llu u=%u }", fid->vnode, fid->unique); return 0; } @@ -826,7 +830,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, struct key *key; int ret; - _enter("{%x:%u},%p{%pd},", + _enter("{%llx:%llu},%p{%pd},", dvnode->fid.vid, dvnode->fid.vnode, dentry, dentry); ASSERTCMP(d_inode(dentry), ==, NULL); @@ -896,7 +900,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); - _enter("{v={%x:%u} n=%pd fl=%lx},", + _enter("{v={%llx:%llu} n=%pd fl=%lx},", vnode->fid.vid, vnode->fid.vnode, dentry, vnode->flags); } else { @@ -965,7 +969,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) /* if the vnode ID has changed, then the dirent points to a * different file */ if (fid.vnode != vnode->fid.vnode) { - _debug("%pd: dirent changed [%u != %u]", + _debug("%pd: dirent changed [%llu != %llu]", dentry, fid.vnode, vnode->fid.vnode); goto not_found; @@ -1085,6 +1089,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc, vnode = AFS_FS_I(inode); set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); + afs_vnode_commit_status(fc, vnode, 0); d_add(new_dentry, inode); } @@ -1104,7 +1109,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) mode |= S_IFDIR; - _enter("{%x:%u},{%pd},%ho", + _enter("{%llx:%llu},{%pd},%ho", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); key = afs_request_key(dvnode->volume->cell); @@ -1169,12 +1174,12 @@ static void afs_dir_remove_subdir(struct dentry *dentry) static int afs_rmdir(struct inode *dir, struct dentry *dentry) { struct afs_fs_cursor fc; - struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; struct key *key; u64 data_version = dvnode->status.data_version; int ret; - _enter("{%x:%u},{%pd}", + _enter("{%llx:%llu},{%pd}", dvnode->fid.vid, dvnode->fid.vnode, dentry); key = afs_request_key(dvnode->volume->cell); @@ -1183,11 +1188,19 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) goto error; } + /* Try to make sure we have a callback promise on the victim. */ + if (d_really_is_positive(dentry)) { + vnode = AFS_FS_I(d_inode(dentry)); + ret = afs_validate(vnode, key); + if (ret < 0) + goto error_key; + } + ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); - afs_fs_remove(&fc, dentry->d_name.name, true, + afs_fs_remove(&fc, vnode, dentry->d_name.name, true, data_version); } @@ -1201,6 +1214,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) } } +error_key: key_put(key); error: return ret; @@ -1231,7 +1245,9 @@ static int afs_dir_remove_link(struct dentry *dentry, struct key *key, if (d_really_is_positive(dentry)) { struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); - if (dir_valid) { + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { + /* Already done */ + } else if (dir_valid) { drop_nlink(&vnode->vfs_inode); if (vnode->vfs_inode.i_nlink == 0) { set_bit(AFS_VNODE_DELETED, &vnode->flags); @@ -1260,13 +1276,13 @@ static int afs_dir_remove_link(struct dentry *dentry, struct key *key, static int afs_unlink(struct inode *dir, struct dentry *dentry) { struct afs_fs_cursor fc; - struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; + struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; struct key *key; unsigned long d_version = (unsigned long)dentry->d_fsdata; u64 data_version = dvnode->status.data_version; int ret; - _enter("{%x:%u},{%pd}", + _enter("{%llx:%llu},{%pd}", dvnode->fid.vid, dvnode->fid.vnode, dentry); if (dentry->d_name.len >= AFSNAMEMAX) @@ -1290,7 +1306,18 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); - afs_fs_remove(&fc, dentry->d_name.name, false, + + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) && + !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) { + yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name, + data_version); + if (fc.ac.error != -ECONNABORTED || + fc.ac.abort_code != RXGEN_OPCODE) + continue; + set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags); + } + + afs_fs_remove(&fc, vnode, dentry->d_name.name, false, data_version); } @@ -1330,7 +1357,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, mode |= S_IFREG; - _enter("{%x:%u},{%pd},%ho,", + _enter("{%llx:%llu},{%pd},%ho,", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); ret = -ENAMETOOLONG; @@ -1393,7 +1420,7 @@ static int afs_link(struct dentry *from, struct inode *dir, dvnode = AFS_FS_I(dir); data_version = dvnode->status.data_version; - _enter("{%x:%u},{%x:%u},{%pd}", + _enter("{%llx:%llu},{%llx:%llu},{%pd}", vnode->fid.vid, vnode->fid.vnode, dvnode->fid.vid, dvnode->fid.vnode, dentry); @@ -1464,7 +1491,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, u64 data_version = dvnode->status.data_version; int ret; - _enter("{%x:%u},{%pd},%s", + _enter("{%llx:%llu},{%pd},%s", dvnode->fid.vid, dvnode->fid.vnode, dentry, content); @@ -1540,7 +1567,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, orig_data_version = orig_dvnode->status.data_version; new_data_version = new_dvnode->status.data_version; - _enter("{%x:%u},{%x:%u},{%x:%u},{%pd}", + _enter("{%llx:%llu},{%llx:%llu},{%llx:%llu},{%pd}", orig_dvnode->fid.vid, orig_dvnode->fid.vnode, vnode->fid.vid, vnode->fid.vnode, new_dvnode->fid.vid, new_dvnode->fid.vnode, @@ -1607,7 +1634,7 @@ static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags) { struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host); - _enter("{{%x:%u}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index); + _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index); set_page_private(page, 0); ClearPagePrivate(page); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c @@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry) return 0; } - ret = dns_query("afsdb", name, len, "", NULL, NULL); + ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL); if (ret == -ENODATA) ret = -EDESTADDRREQ; return ret; @@ -62,7 +62,7 @@ struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir) struct inode *inode; int ret = -ENOENT; - _enter("%p{%pd}, {%x:%u}", + _enter("%p{%pd}, {%llx:%llu}", dentry, dentry, vnode->fid.vid, vnode->fid.vnode); if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) diff --git a/fs/afs/file.c b/fs/afs/file.c @@ -121,7 +121,7 @@ int afs_open(struct inode *inode, struct file *file) struct key *key; int ret; - _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode); key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { @@ -170,7 +170,7 @@ int afs_release(struct inode *inode, struct file *file) struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_file *af = file->private_data; - _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode); if ((file->f_mode & FMODE_WRITE)) return vfs_fsync(file, 0); @@ -228,7 +228,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de struct afs_fs_cursor fc; int ret; - _enter("%s{%x:%u.%u},%x,,,", + _enter("%s{%llx:%llu.%u},%x,,,", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, @@ -634,7 +634,7 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); unsigned long priv; - _enter("{{%x:%u}[%lu],%lx},%x", + _enter("{{%llx:%llu}[%lu],%lx},%x", vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, gfp_flags); diff --git a/fs/afs/flock.c b/fs/afs/flock.c @@ -29,7 +29,7 @@ static const struct file_lock_operations afs_lock_ops = { */ void afs_lock_may_be_available(struct afs_vnode *vnode) { - _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0); } @@ -76,7 +76,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key, struct afs_fs_cursor fc; int ret; - _enter("%s{%x:%u.%u},%x,%u", + _enter("%s{%llx:%llu.%u},%x,%u", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, @@ -107,7 +107,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) struct afs_fs_cursor fc; int ret; - _enter("%s{%x:%u.%u},%x", + _enter("%s{%llx:%llu.%u},%x", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, @@ -138,7 +138,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key) struct afs_fs_cursor fc; int ret; - _enter("%s{%x:%u.%u},%x", + _enter("%s{%llx:%llu.%u},%x", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, @@ -175,7 +175,7 @@ void afs_lock_work(struct work_struct *work) struct key *key; int ret; - _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); spin_lock(&vnode->lock); @@ -192,7 +192,7 @@ again: ret = afs_release_lock(vnode, vnode->lock_key); if (ret < 0) printk(KERN_WARNING "AFS:" - " Failed to release lock on {%x:%x} error %d\n", + " Failed to release lock on {%llx:%llx} error %d\n", vnode->fid.vid, vnode->fid.vnode, ret); spin_lock(&vnode->lock); @@ -229,7 +229,7 @@ again: key_put(key); if (ret < 0) - pr_warning("AFS: Failed to extend lock on {%x:%x} error %d\n", + pr_warning("AFS: Failed to extend lock on {%llx:%llx} error %d\n", vnode->fid.vid, vnode->fid.vnode, ret); spin_lock(&vnode->lock); @@ -430,7 +430,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) struct key *key = afs_file_key(file); int ret; - _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); + _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); /* only whole-file locks are supported */ if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX) @@ -582,7 +582,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl) struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); int ret; - _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); + _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); /* Flush all pending writes before doing anything with locks. */ vfs_fsync(file, 0); @@ -639,7 +639,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl) { struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); - _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}", + _enter("{%llx:%llu},%d,{t=%x,fl=%x,r=%Ld:%Ld}", vnode->fid.vid, vnode->fid.vnode, cmd, fl->fl_type, fl->fl_flags, (long long) fl->fl_start, (long long) fl->fl_end); @@ -662,7 +662,7 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl) { struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); - _enter("{%x:%u},%d,{t=%x,fl=%x}", + _enter("{%llx:%llu},%d,{t=%x,fl=%x}", vnode->fid.vid, vnode->fid.vnode, cmd, fl->fl_type, fl->fl_flags); diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c @@ -0,0 +1,270 @@ +/* AFS fileserver probing + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/sched.h> +#include <linux/slab.h> +#include "afs_fs.h" +#include "internal.h" +#include "protocol_yfs.h" + +static bool afs_fs_probe_done(struct afs_server *server) +{ + if (!atomic_dec_and_test(&server->probe_outstanding)) + return false; + + wake_up_var(&server->probe_outstanding); + clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags); + wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING); + return true; +} + +/* + * Process the result of probing a fileserver. This is called after successful + * or failed delivery of an FS.GetCapabilities operation. + */ +void afs_fileserver_probe_result(struct afs_call *call) +{ + struct afs_addr_list *alist = call->alist; + struct afs_server *server = call->reply[0]; + unsigned int server_index = (long)call->reply[1]; + unsigned int index = call->addr_ix; + unsigned int rtt = UINT_MAX; + bool have_result = false; + u64 _rtt; + int ret = call->error; + + _enter("%pU,%u", &server->uuid, index); + + spin_lock(&server->probe_lock); + + switch (ret) { + case 0: + server->probe.error = 0; + goto responded; + case -ECONNABORTED: + if (!server->probe.responded) { + server->probe.abort_code = call->abort_code; + server->probe.error = ret; + } + goto responded; + case -ENOMEM: + case -ENONET: + server->probe.local_failure = true; + afs_io_error(call, afs_io_error_fs_probe_fail); + goto out; + case -ECONNRESET: /* Responded, but call expired. */ + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + default: + clear_bit(index, &alist->responded); + set_bit(index, &alist->failed); + if (!server->probe.responded && + (server->probe.error == 0 || + server->probe.error == -ETIMEDOUT || + server->probe.error == -ETIME)) + server->probe.error = ret; + afs_io_error(call, afs_io_error_fs_probe_fail); + goto out; + } + +responded: + set_bit(index, &alist->responded); + clear_bit(index, &alist->failed); + + if (call->service_id == YFS_FS_SERVICE) { + server->probe.is_yfs = true; + set_bit(AFS_SERVER_FL_IS_YFS, &server->flags); + alist->addrs[index].srx_service = call->service_id; + } else { + server->probe.not_yfs = true; + if (!server->probe.is_yfs) { + clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags); + alist->addrs[index].srx_service = call->service_id; + } + } + + /* Get the RTT and scale it to fit into a 32-bit value that represents + * over a minute of time so that we can access it with one instruction + * on a 32-bit system. + */ + _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall); + _rtt /= 64; + rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt; + if (rtt < server->probe.rtt) { + server->probe.rtt = rtt; + alist->preferred = index; + have_result = true; + } + + smp_wmb(); /* Set rtt before responded. */ + server->probe.responded = true; + set_bit(AFS_SERVER_FL_PROBED, &server->flags); +out: + spin_unlock(&server->probe_lock); + + _debug("probe [%u][%u] %pISpc rtt=%u ret=%d", + server_index, index, &alist->addrs[index].transport, + (unsigned int)rtt, ret); + + have_result |= afs_fs_probe_done(server); + if (have_result) { + server->probe.have_result = true; + wake_up_var(&server->probe.have_result); + wake_up_all(&server->probe_wq); + } +} + +/* + * Probe all of a fileserver's addresses to find out the best route and to + * query its capabilities. + */ +static int afs_do_probe_fileserver(struct afs_net *net, + struct afs_server *server, + struct key *key, + unsigned int server_index) +{ + struct afs_addr_cursor ac = { + .index = 0, + }; + int ret; + + _enter("%pU", &server->uuid); + + read_lock(&server->fs_lock); + ac.alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->fs_lock)); + read_unlock(&server->fs_lock); + + atomic_set(&server->probe_outstanding, ac.alist->nr_addrs); + memset(&server->probe, 0, sizeof(server->probe)); + server->probe.rtt = UINT_MAX; + + for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) { + ret = afs_fs_get_capabilities(net, server, &ac, key, server_index, + true); + if (ret != -EINPROGRESS) { + afs_fs_probe_done(server); + return ret; + } + } + + return 0; +} + +/* + * Send off probes to all unprobed servers. + */ +int afs_probe_fileservers(struct afs_net *net, struct key *key, + struct afs_server_list *list) +{ + struct afs_server *server; + int i, ret; + + for (i = 0; i < list->nr_servers; i++) { + server = list->servers[i].server; + if (test_bit(AFS_SERVER_FL_PROBED, &server->flags)) + continue; + + if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) { + ret = afs_do_probe_fileserver(net, server, key, i); + if (ret) + return ret; + } + } + + return 0; +} + +/* + * Wait for the first as-yet untried fileserver to respond. + */ +int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) +{ + struct wait_queue_entry *waits; + struct afs_server *server; + unsigned int rtt = UINT_MAX; + bool have_responders = false; + int pref = -1, i; + + _enter("%u,%lx", slist->nr_servers, untried); + + /* Only wait for servers that have a probe outstanding. */ + for (i = 0; i < slist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = slist->servers[i].server; + if (!test_bit(AFS_SERVER_FL_PROBING, &server->flags)) + __clear_bit(i, &untried); + if (server->probe.responded) + have_responders = true; + } + } + if (have_responders || !untried) + return 0; + + waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL); + if (!waits) + return -ENOMEM; + + for (i = 0; i < slist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = slist->servers[i].server; + init_waitqueue_entry(&waits[i], current); + add_wait_queue(&server->probe_wq, &waits[i]); + } + } + + for (;;) { + bool still_probing = false; + + set_current_state(TASK_INTERRUPTIBLE); + for (i = 0; i < slist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = slist->servers[i].server; + if (server->probe.responded) + goto stop; + if (test_bit(AFS_SERVER_FL_PROBING, &server->flags)) + still_probing = true; + } + } + + if (!still_probing || unlikely(signal_pending(current))) + goto stop; + schedule(); + } + +stop: + set_current_state(TASK_RUNNING); + + for (i = 0; i < slist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = slist->servers[i].server; + if (server->probe.responded && + server->probe.rtt < rtt) { + pref = i; + rtt = server->probe.rtt; + } + + remove_wait_queue(&server->probe_wq, &waits[i]); + } + } + + kfree(waits); + + if (pref == -1 && signal_pending(current)) + return -ERESTARTSYS; + + if (pref >= 0) + slist->preferred = pref; + return 0; +} diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c @@ -17,15 +17,10 @@ #include "internal.h" #include "afs_fs.h" #include "xdr_fs.h" +#include "protocol_yfs.h" static const struct afs_fid afs_zero_fid; -/* - * We need somewhere to discard into in case the server helpfully returns more - * than we asked for in FS.FetchData{,64}. - */ -static u8 afs_discard_buffer[64]; - static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi) { call->cbi = afs_get_cb_interest(cbi); @@ -75,8 +70,7 @@ void afs_update_inode_from_status(struct afs_vnode *vnode, struct timespec64 t; umode_t mode; - t.tv_sec = status->mtime_client; - t.tv_nsec = 0; + t = status->mtime_client; vnode->vfs_inode.i_ctime = t; vnode->vfs_inode.i_mtime = t; vnode->vfs_inode.i_atime = t; @@ -96,7 +90,7 @@ void afs_update_inode_from_status(struct afs_vnode *vnode, if (!(flags & AFS_VNODE_NOT_YET_SET)) { if (expected_version && *expected_version != status->data_version) { - _debug("vnode modified %llx on {%x:%u} [exp %llx]", + _debug("vnode modified %llx on {%llx:%llu} [exp %llx]", (unsigned long long) status->data_version, vnode->fid.vid, vnode->fid.vnode, (unsigned long long) *expected_version); @@ -170,7 +164,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, if (type != status->type && vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { - pr_warning("Vnode %x:%x:%x changed type %u to %u\n", + pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n", vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, @@ -200,8 +194,10 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, EXTRACT_M(mode); EXTRACT_M(group); - status->mtime_client = ntohl(xdr->mtime_client); - status->mtime_server = ntohl(xdr->mtime_server); + status->mtime_client.tv_sec = ntohl(xdr->mtime_client); + status->mtime_client.tv_nsec = 0; + status->mtime_server.tv_sec = ntohl(xdr->mtime_server); + status->mtime_server.tv_nsec = 0; status->lock_count = ntohl(xdr->lock_count); size = (u64)ntohl(xdr->size_lo); @@ -233,7 +229,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, bad: xdr_dump_bad(*_bp); - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); } /* @@ -273,7 +269,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call, write_seqlock(&vnode->cb_lock); - if (call->cb_break == afs_cb_break_sum(vnode, cbi)) { + if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) { vnode->cb_version = ntohl(*bp++); cb_expiry = ntohl(*bp++); vnode->cb_type = ntohl(*bp++); @@ -293,13 +289,19 @@ static void xdr_decode_AFSCallBack(struct afs_call *call, *_bp = bp; } -static void xdr_decode_AFSCallBack_raw(const __be32 **_bp, +static ktime_t xdr_decode_expiry(struct afs_call *call, u32 expiry) +{ + return ktime_add_ns(call->reply_time, expiry * NSEC_PER_SEC); +} + +static void xdr_decode_AFSCallBack_raw(struct afs_call *call, + const __be32 **_bp, struct afs_callback *cb) { const __be32 *bp = *_bp; cb->version = ntohl(*bp++); - cb->expiry = ntohl(*bp++); + cb->expires_at = xdr_decode_expiry(call, ntohl(*bp++)); cb->type = ntohl(*bp++); *_bp = bp; } @@ -311,14 +313,18 @@ static void xdr_decode_AFSVolSync(const __be32 **_bp, struct afs_volsync *volsync) { const __be32 *bp = *_bp; + u32 creation; - volsync->creation = ntohl(*bp++); + creation = ntohl(*bp++); bp++; /* spare2 */ bp++; /* spare3 */ bp++; /* spare4 */ bp++; /* spare5 */ bp++; /* spare6 */ *_bp = bp; + + if (volsync) + volsync->creation = creation; } /* @@ -379,6 +385,8 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, vs->blocks_in_use = ntohl(*bp++); vs->part_blocks_avail = ntohl(*bp++); vs->part_max_blocks = ntohl(*bp++); + vs->vol_copy_date = 0; + vs->vol_backup_date = 0; *_bp = bp; } @@ -395,16 +403,16 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) if (ret < 0) return ret; - _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; xdr_decode_AFSCallBack(call, vnode, &bp); - if (call->reply[1]) - xdr_decode_AFSVolSync(&bp, call->reply[1]); + xdr_decode_AFSVolSync(&bp, call->reply[1]); _leave(" = 0 [done]"); return 0; @@ -431,7 +439,10 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy struct afs_net *net = afs_v2net(vnode); __be32 *bp; - _enter(",%x,{%x:%u},,", + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_fetch_file_status(fc, volsync, new_inode); + + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus_vnode, @@ -445,6 +456,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy call->reply[0] = vnode; call->reply[1] = volsync; call->expected_version = new_inode ? 1 : vnode->status.data_version; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -468,139 +480,117 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) struct afs_read *req = call->reply[2]; const __be32 *bp; unsigned int size; - void *buffer; int ret; - _enter("{%u,%zu/%u;%llu/%llu}", - call->unmarshall, call->offset, call->count, - req->remain, req->actual_len); + _enter("{%u,%zu/%llu}", + call->unmarshall, iov_iter_count(&call->iter), req->actual_len); switch (call->unmarshall) { case 0: req->actual_len = 0; - call->offset = 0; + req->index = 0; + req->offset = req->pos & (PAGE_SIZE - 1); call->unmarshall++; - if (call->operation_ID != FSFETCHDATA64) { - call->unmarshall++; - goto no_msw; + if (call->operation_ID == FSFETCHDATA64) { + afs_extract_to_tmp64(call); + } else { + call->tmp_u = htonl(0); + afs_extract_to_tmp(call); } - /* extract the upper part of the returned data length of an - * FSFETCHDATA64 op (which should always be 0 using this - * client) */ - case 1: - _debug("extract data length (MSW)"); - ret = afs_extract_data(call, &call->tmp, 4, true); - if (ret < 0) - return ret; - - req->actual_len = ntohl(call->tmp); - req->actual_len <<= 32; - call->offset = 0; - call->unmarshall++; - - no_msw: /* extract the returned data length */ - case 2: + case 1: _debug("extract data length"); - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; - req->actual_len |= ntohl(call->tmp); + req->actual_len = be64_to_cpu(call->tmp64); _debug("DATA length: %llu", req->actual_len); - - req->remain = req->actual_len; - call->offset = req->pos & (PAGE_SIZE - 1); - req->index = 0; - if (req->actual_len == 0) + req->remain = min(req->len, req->actual_len); + if (req->remain == 0) goto no_more_data; + call->unmarshall++; begin_page: ASSERTCMP(req->index, <, req->nr_pages); - if (req->remain > PAGE_SIZE - call->offset) - size = PAGE_SIZE - call->offset; + if (req->remain > PAGE_SIZE - req->offset) + size = PAGE_SIZE - req->offset; else size = req->remain; - call->count = call->offset + size; - ASSERTCMP(call->count, <=, PAGE_SIZE); - req->remain -= size; + call->bvec[0].bv_len = size; + call->bvec[0].bv_offset = req->offset; + call->bvec[0].bv_page = req->pages[req->index]; + iov_iter_bvec(&call->iter, READ, call->bvec, 1, size); + ASSERTCMP(size, <=, PAGE_SIZE); /* extract the returned data */ - case 3: - _debug("extract data %llu/%llu %zu/%u", - req->remain, req->actual_len, call->offset, call->count); + case 2: + _debug("extract data %zu/%llu", + iov_iter_count(&call->iter), req->remain); - buffer = kmap(req->pages[req->index]); - ret = afs_extract_data(call, buffer, call->count, true); - kunmap(req->pages[req->index]); + ret = afs_extract_data(call, true); if (ret < 0) return ret; - if (call->offset == PAGE_SIZE) { + req->remain -= call->bvec[0].bv_len; + req->offset += call->bvec[0].bv_len; + ASSERTCMP(req->offset, <=, PAGE_SIZE); + if (req->offset == PAGE_SIZE) { + req->offset = 0; if (req->page_done) req->page_done(call, req); req->index++; - if (req->remain > 0) { - call->offset = 0; - if (req->index >= req->nr_pages) { - call->unmarshall = 4; - goto begin_discard; - } + if (req->remain > 0) goto begin_page; - } } - goto no_more_data; + + ASSERTCMP(req->remain, ==, 0); + if (req->actual_len <= req->len) + goto no_more_data; /* Discard any excess data the server gave us */ - begin_discard: - case 4: - size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain); - call->count = size; - _debug("extract discard %llu/%llu %zu/%u", - req->remain, req->actual_len, call->offset, call->count); - - call->offset = 0; - ret = afs_extract_data(call, afs_discard_buffer, call->count, true); - req->remain -= call->offset; + iov_iter_discard(&call->iter, READ, req->actual_len - req->len); + call->unmarshall = 3; + case 3: + _debug("extract discard %zu/%llu", + iov_iter_count(&call->iter), req->actual_len - req->len); + + ret = afs_extract_data(call, true); if (ret < 0) return ret; - if (req->remain > 0) - goto begin_discard; no_more_data: - call->offset = 0; - call->unmarshall = 5; + call->unmarshall = 4; + afs_extract_to_buf(call, (21 + 3 + 6) * 4); /* extract the metadata */ - case 5: - ret = afs_extract_data(call, call->buffer, - (21 + 3 + 6) * 4, false); + case 4: + ret = afs_extract_data(call, false); if (ret < 0) return ret; bp = call->buffer; - if (afs_decode_status(call, &bp, &vnode->status, vnode, - &vnode->status.data_version, req) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &vnode->status.data_version, req); + if (ret < 0) + return ret; xdr_decode_AFSCallBack(call, vnode, &bp); - if (call->reply[1]) - xdr_decode_AFSVolSync(&bp, call->reply[1]); + xdr_decode_AFSVolSync(&bp, call->reply[1]); - call->offset = 0; call->unmarshall++; - case 6: + case 5: break; } for (; req->index < req->nr_pages; req->index++) { - if (call->count < PAGE_SIZE) + if (req->offset < PAGE_SIZE) zero_user_segment(req->pages[req->index], - call->count, PAGE_SIZE); + req->offset, PAGE_SIZE); if (req->page_done) req->page_done(call, req); - call->count = 0; + req->offset = 0; } _leave(" = 0 [done]"); @@ -653,6 +643,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) call->reply[1] = NULL; /* volsync */ call->reply[2] = req; call->expected_version = vnode->status.data_version; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -682,6 +673,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) struct afs_net *net = afs_v2net(vnode); __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_fetch_data(fc, req); + if (upper_32_bits(req->pos) || upper_32_bits(req->len) || upper_32_bits(req->pos + req->len)) @@ -698,6 +692,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) call->reply[1] = NULL; /* volsync */ call->reply[2] = req; call->expected_version = vnode->status.data_version; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -733,11 +728,14 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply[1]); - if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) < 0 || - afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); - xdr_decode_AFSCallBack_raw(&bp, call->reply[3]); + ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL); + if (ret < 0) + return ret; + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_AFSCallBack_raw(call, &bp, call->reply[3]); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -778,6 +776,15 @@ int afs_fs_create(struct afs_fs_cursor *fc, size_t namesz, reqsz, padsz; __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)){ + if (S_ISDIR(mode)) + return yfs_fs_make_dir(fc, name, mode, current_data_version, + newfid, newstatus, newcb); + else + return yfs_fs_create_file(fc, name, mode, current_data_version, + newfid, newstatus, newcb); + } + _enter(""); namesz = strlen(name); @@ -796,6 +803,7 @@ int afs_fs_create(struct afs_fs_cursor *fc, call->reply[2] = newstatus; call->reply[3] = newcb; call->expected_version = current_data_version + 1; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -839,9 +847,10 @@ static int afs_deliver_fs_remove(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -868,15 +877,18 @@ static const struct afs_call_type afs_RXFSRemoveDir = { /* * remove a file or directory */ -int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir, - u64 current_data_version) +int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + const char *name, bool isdir, u64 current_data_version) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + struct afs_net *net = afs_v2net(dvnode); size_t namesz, reqsz, padsz; __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_remove(fc, vnode, name, isdir, current_data_version); + _enter(""); namesz = strlen(name); @@ -890,15 +902,16 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir, return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; + call->reply[0] = dvnode; + call->reply[1] = vnode; call->expected_version = current_data_version + 1; /* marshall the parameters */ bp = call->request; *bp++ = htonl(isdir ? FSREMOVEDIR : FSREMOVEFILE); - *bp++ = htonl(vnode->fid.vid); - *bp++ = htonl(vnode->fid.vnode); - *bp++ = htonl(vnode->fid.unique); + *bp++ = htonl(dvnode->fid.vid); + *bp++ = htonl(dvnode->fid.vnode); + *bp++ = htonl(dvnode->fid.unique); *bp++ = htonl(namesz); memcpy(bp, name, namesz); bp = (void *) bp + namesz; @@ -908,7 +921,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir, } afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); + trace_afs_make_fs_call(call, &dvnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -929,10 +942,13 @@ static int afs_deliver_fs_link(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL) < 0 || - afs_decode_status(call, &bp, &dvnode->status, dvnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL); + if (ret < 0) + return ret; + ret = afs_decode_status(call, &bp, &dvnode->status, dvnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -961,6 +977,9 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, size_t namesz, reqsz, padsz; __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_link(fc, vnode, name, current_data_version); + _enter(""); namesz = strlen(name); @@ -1016,10 +1035,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply[1]); - if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) || - afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL); + if (ret < 0) + return ret; + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -1052,6 +1074,10 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, size_t namesz, reqsz, padsz, c_namesz, c_padsz; __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_symlink(fc, name, contents, current_data_version, + newfid, newstatus); + _enter(""); namesz = strlen(name); @@ -1122,13 +1148,16 @@ static int afs_deliver_fs_rename(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); - if (new_dvnode != orig_dvnode && - afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode, - &call->expected_version_2, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + if (new_dvnode != orig_dvnode) { + ret = afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode, + &call->expected_version_2, NULL); + if (ret < 0) + return ret; + } /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -1161,6 +1190,12 @@ int afs_fs_rename(struct afs_fs_cursor *fc, size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz; __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_rename(fc, orig_name, + new_dvnode, new_name, + current_orig_data_version, + current_new_data_version); + _enter(""); o_namesz = strlen(orig_name); @@ -1231,9 +1266,10 @@ static int afs_deliver_fs_store_data(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ afs_pages_written_back(vnode, call); @@ -1273,7 +1309,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, struct afs_net *net = afs_v2net(vnode); __be32 *bp; - _enter(",%x,{%x:%u},,", + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(net, &afs_RXFSStoreData64, @@ -1330,7 +1366,10 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, loff_t size, pos, i_size; __be32 *bp; - _enter(",%x,{%x:%u},,", + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_store_data(fc, mapping, first, last, offset, to); + + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); size = (loff_t)to - (loff_t)offset; @@ -1407,9 +1446,10 @@ static int afs_deliver_fs_store_status(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -1451,7 +1491,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) struct afs_net *net = afs_v2net(vnode); __be32 *bp; - _enter(",%x,{%x:%u},,", + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); @@ -1498,7 +1538,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) struct afs_net *net = afs_v2net(vnode); __be32 *bp; - _enter(",%x,{%x:%u},,", + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); @@ -1544,10 +1584,13 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) struct afs_net *net = afs_v2net(vnode); __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_setattr(fc, attr); + if (attr->ia_valid & ATTR_SIZE) return afs_fs_setattr_size(fc, attr); - _enter(",%x,{%x:%u},,", + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus, @@ -1581,164 +1624,114 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) { const __be32 *bp; char *p; + u32 size; int ret; _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: - call->offset = 0; call->unmarshall++; + afs_extract_to_buf(call, 12 * 4); /* extract the returned status record */ case 1: _debug("extract status"); - ret = afs_extract_data(call, call->buffer, - 12 * 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; bp = call->buffer; xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]); - call->offset = 0; call->unmarshall++; + afs_extract_to_tmp(call); /* extract the volume name length */ case 2: - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; call->count = ntohl(call->tmp); _debug("volname length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG); - call->offset = 0; + return afs_protocol_error(call, -EBADMSG, + afs_eproto_volname_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_begin(call, call->reply[2], size); call->unmarshall++; /* extract the volume name */ case 3: _debug("extract volname"); - if (call->count > 0) { - ret = afs_extract_data(call, call->reply[2], - call->count, true); - if (ret < 0) - return ret; - } + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; p = call->reply[2]; p[call->count] = 0; _debug("volname '%s'", p); - - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; - /* extract the volume name padding */ - if ((call->count & 3) == 0) { - call->unmarshall++; - goto no_volname_padding; - } - call->count = 4 - (call->count & 3); - - case 4: - ret = afs_extract_data(call, call->buffer, - call->count, true); - if (ret < 0) - return ret; - - call->offset = 0; - call->unmarshall++; - no_volname_padding: - /* extract the offline message length */ - case 5: - ret = afs_extract_data(call, &call->tmp, 4, true); + case 4: + ret = afs_extract_data(call, true); if (ret < 0) return ret; call->count = ntohl(call->tmp); _debug("offline msg length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG); - call->offset = 0; + return afs_protocol_error(call, -EBADMSG, + afs_eproto_offline_msg_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_begin(call, call->reply[2], size); call->unmarshall++; /* extract the offline message */ - case 6: + case 5: _debug("extract offline"); - if (call->count > 0) { - ret = afs_extract_data(call, call->reply[2], - call->count, true); - if (ret < 0) - return ret; - } + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; p = call->reply[2]; p[call->count] = 0; _debug("offline '%s'", p); - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; - /* extract the offline message padding */ - if ((call->count & 3) == 0) { - call->unmarshall++; - goto no_offline_padding; - } - call->count = 4 - (call->count & 3); - - case 7: - ret = afs_extract_data(call, call->buffer, - call->count, true); - if (ret < 0) - return ret; - - call->offset = 0; - call->unmarshall++; - no_offline_padding: - /* extract the message of the day length */ - case 8: - ret = afs_extract_data(call, &call->tmp, 4, true); + case 6: + ret = afs_extract_data(call, true); if (ret < 0) return ret; call->count = ntohl(call->tmp); _debug("motd length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG); - call->offset = 0; + return afs_protocol_error(call, -EBADMSG, + afs_eproto_motd_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_begin(call, call->reply[2], size); call->unmarshall++; /* extract the message of the day */ - case 9: + case 7: _debug("extract motd"); - if (call->count > 0) { - ret = afs_extract_data(call, call->reply[2], - call->count, true); - if (ret < 0) - return ret; - } + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; p = call->reply[2]; p[call->count] = 0; _debug("motd '%s'", p); - call->offset = 0; call->unmarshall++; - /* extract the message of the day padding */ - call->count = (4 - (call->count & 3)) & 3; - - case 10: - ret = afs_extract_data(call, call->buffer, - call->count, false); - if (ret < 0) - return ret; - - call->offset = 0; - call->unmarshall++; - case 11: + case 8: break; } @@ -1778,6 +1771,9 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc, __be32 *bp; void *tmpbuf; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_get_volume_status(fc, vs); + _enter(""); tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL); @@ -1867,6 +1863,9 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) struct afs_net *net = afs_v2net(vnode); __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_set_lock(fc, type); + _enter(""); call = afs_alloc_flat_call(net, &afs_RXFSSetLock, 5 * 4, 6 * 4); @@ -1899,6 +1898,9 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc) struct afs_net *net = afs_v2net(vnode); __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_extend_lock(fc); + _enter(""); call = afs_alloc_flat_call(net, &afs_RXFSExtendLock, 4 * 4, 6 * 4); @@ -1930,6 +1932,9 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc) struct afs_net *net = afs_v2net(vnode); __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_release_lock(fc); + _enter(""); call = afs_alloc_flat_call(net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4); @@ -2004,19 +2009,16 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call) u32 count; int ret; - _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + _enter("{%u,%zu}", call->unmarshall, iov_iter_count(&call->iter)); -again: switch (call->unmarshall) { case 0: - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; /* Extract the capabilities word count */ case 1: - ret = afs_extract_data(call, &call->tmp, - 1 * sizeof(__be32), - true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -2024,24 +2026,17 @@ again: call->count = count; call->count2 = count; - call->offset = 0; + iov_iter_discard(&call->iter, READ, count * sizeof(__be32)); call->unmarshall++; /* Extract capabilities words */ case 2: - count = min(call->count, 16U); - ret = afs_extract_data(call, call->buffer, - count * sizeof(__be32), - call->count > 16); + ret = afs_extract_data(call, false); if (ret < 0) return ret; /* TODO: Examine capabilities */ - call->count -= count; - if (call->count > 0) - goto again; - call->offset = 0; call->unmarshall++; break; } @@ -2050,6 +2045,14 @@ again: return 0; } +static void afs_destroy_fs_get_capabilities(struct afs_call *call) +{ + struct afs_server *server = call->reply[0]; + + afs_put_server(call->net, server); + afs_flat_call_destructor(call); +} + /* * FS.GetCapabilities operation type */ @@ -2057,7 +2060,8 @@ static const struct afs_call_type afs_RXFSGetCapabilities = { .name = "FS.GetCapabilities", .op = afs_FS_GetCapabilities, .deliver = afs_deliver_fs_get_capabilities, - .destructor = afs_flat_call_destructor, + .done = afs_fileserver_probe_result, + .destructor = afs_destroy_fs_get_capabilities, }; /* @@ -2067,7 +2071,9 @@ static const struct afs_call_type afs_RXFSGetCapabilities = { int afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server, struct afs_addr_cursor *ac, - struct key *key) + struct key *key, + unsigned int server_index, + bool async) { struct afs_call *call; __be32 *bp; @@ -2079,6 +2085,10 @@ int afs_fs_get_capabilities(struct afs_net *net, return -ENOMEM; call->key = key; + call->reply[0] = afs_get_server(server); + call->reply[1] = (void *)(long)server_index; + call->upgrade = true; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -2086,7 +2096,7 @@ int afs_fs_get_capabilities(struct afs_net *net, /* Can't take a ref on server */ trace_afs_make_fs_call(call, NULL); - return afs_make_call(ac, call, GFP_NOFS, false); + return afs_make_call(ac, call, GFP_NOFS, async); } /* @@ -2097,7 +2107,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) struct afs_file_status *status = call->reply[1]; struct afs_callback *callback = call->reply[2]; struct afs_volsync *volsync = call->reply[3]; - struct afs_vnode *vnode = call->reply[0]; + struct afs_fid *fid = call->reply[0]; const __be32 *bp; int ret; @@ -2105,21 +2115,16 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) if (ret < 0) return ret; - _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu}", fid->vid, fid->vnode); /* unmarshall the reply once we've received all of it */ bp = call->buffer; - afs_decode_status(call, &bp, status, vnode, - &call->expected_version, NULL); - callback[call->count].version = ntohl(bp[0]); - callback[call->count].expiry = ntohl(bp[1]); - callback[call->count].type = ntohl(bp[2]); - if (vnode) - xdr_decode_AFSCallBack(call, vnode, &bp); - else - bp += 3; - if (volsync) - xdr_decode_AFSVolSync(&bp, volsync); + ret = afs_decode_status(call, &bp, status, NULL, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_AFSCallBack_raw(call, &bp, callback); + xdr_decode_AFSVolSync(&bp, volsync); _leave(" = 0 [done]"); return 0; @@ -2148,7 +2153,10 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc, struct afs_call *call; __be32 *bp; - _enter(",%x,{%x:%u},,", + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_fetch_status(fc, net, fid, status, callback, volsync); + + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), fid->vid, fid->vnode); call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); @@ -2158,11 +2166,12 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc, } call->key = fc->key; - call->reply[0] = NULL; /* vnode for fid[0] */ + call->reply[0] = fid; call->reply[1] = status; call->reply[2] = callback; call->reply[3] = volsync; call->expected_version = 1; /* vnode->status.data_version */ + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -2193,38 +2202,40 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) switch (call->unmarshall) { case 0: - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; /* Extract the file status count and array in two steps */ case 1: _debug("extract status count"); - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; tmp = ntohl(call->tmp); _debug("status count: %u/%u", tmp, call->count2); if (tmp != call->count2) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_ibulkst_count); call->count = 0; call->unmarshall++; more_counts: - call->offset = 0; + afs_extract_to_buf(call, 21 * sizeof(__be32)); case 2: _debug("extract status array %u", call->count); - ret = afs_extract_data(call, call->buffer, 21 * 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; bp = call->buffer; statuses = call->reply[1]; - if (afs_decode_status(call, &bp, &statuses[call->count], - call->count == 0 ? vnode : NULL, - NULL, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &statuses[call->count], + call->count == 0 ? vnode : NULL, + NULL, NULL); + if (ret < 0) + return ret; call->count++; if (call->count < call->count2) @@ -2232,27 +2243,28 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) call->count = 0; call->unmarshall++; - call->offset = 0; + afs_extract_to_tmp(call); /* Extract the callback count and array in two steps */ case 3: _debug("extract CB count"); - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; tmp = ntohl(call->tmp); _debug("CB count: %u", tmp); if (tmp != call->count2) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_ibulkst_cb_count); call->count = 0; call->unmarshall++; more_cbs: - call->offset = 0; + afs_extract_to_buf(call, 3 * sizeof(__be32)); case 4: _debug("extract CB array"); - ret = afs_extract_data(call, call->buffer, 3 * 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -2260,7 +2272,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) bp = call->buffer; callbacks = call->reply[2]; callbacks[call->count].version = ntohl(bp[0]); - callbacks[call->count].expiry = ntohl(bp[1]); + callbacks[call->count].expires_at = xdr_decode_expiry(call, ntohl(bp[1])); callbacks[call->count].type = ntohl(bp[2]); statuses = call->reply[1]; if (call->count == 0 && vnode && statuses[0].abort_code == 0) @@ -2269,19 +2281,17 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) if (call->count < call->count2) goto more_cbs; - call->offset = 0; + afs_extract_to_buf(call, 6 * sizeof(__be32)); call->unmarshall++; case 5: - ret = afs_extract_data(call, call->buffer, 6 * 4, false); + ret = afs_extract_data(call, false); if (ret < 0) return ret; bp = call->buffer; - if (call->reply[3]) - xdr_decode_AFSVolSync(&bp, call->reply[3]); + xdr_decode_AFSVolSync(&bp, call->reply[3]); - call->offset = 0; call->unmarshall++; case 6: @@ -2317,7 +2327,11 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, __be32 *bp; int i; - _enter(",%x,{%x:%u},%u", + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_inline_bulk_status(fc, net, fids, statuses, callbacks, + nr_fids, volsync); + + _enter(",%x,{%llx:%llu},%u", key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids); call = afs_alloc_flat_call(net, &afs_RXFSInlineBulkStatus, @@ -2334,6 +2348,7 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, call->reply[2] = callbacks; call->reply[3] = volsync; call->count2 = nr_fids; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; diff --git a/fs/afs/inode.c b/fs/afs/inode.c @@ -82,7 +82,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key) default: printk("kAFS: AFS vnode with undefined type\n"); read_sequnlock_excl(&vnode->cb_lock); - return afs_protocol_error(NULL, -EBADMSG); + return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type); } inode->i_blocks = 0; @@ -100,7 +100,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode) struct afs_fs_cursor fc; int ret; - _enter("%s,{%x:%u.%u,S=%lx}", + _enter("%s,{%llx:%llu.%u,S=%lx}", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, vnode->flags); @@ -127,9 +127,9 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode) int afs_iget5_test(struct inode *inode, void *opaque) { struct afs_iget_data *data = opaque; + struct afs_vnode *vnode = AFS_FS_I(inode); - return inode->i_ino == data->fid.vnode && - inode->i_generation == data->fid.unique; + return memcmp(&vnode->fid, &data->fid, sizeof(data->fid)) == 0; } /* @@ -150,11 +150,14 @@ static int afs_iget5_set(struct inode *inode, void *opaque) struct afs_iget_data *data = opaque; struct afs_vnode *vnode = AFS_FS_I(inode); - inode->i_ino = data->fid.vnode; - inode->i_generation = data->fid.unique; vnode->fid = data->fid; vnode->volume = data->volume; + /* YFS supports 96-bit vnode IDs, but Linux only supports + * 64-bit inode numbers. + */ + inode->i_ino = data->fid.vnode; + inode->i_generation = data->fid.unique; return 0; } @@ -193,7 +196,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) return ERR_PTR(-ENOMEM); } - _debug("GOT INODE %p { ino=%lu, vl=%x, vn=%x, u=%x }", + _debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }", inode, inode->i_ino, data.fid.vid, data.fid.vnode, data.fid.unique); @@ -252,8 +255,8 @@ static void afs_get_inode_cache(struct afs_vnode *vnode) key.vnode_id = vnode->fid.vnode; key.unique = vnode->fid.unique; - key.vnode_id_ext[0] = 0; - key.vnode_id_ext[1] = 0; + key.vnode_id_ext[0] = vnode->fid.vnode >> 32; + key.vnode_id_ext[1] = vnode->fid.vnode_hi; aux.data_version = vnode->status.data_version; vnode->cache = fscache_acquire_cookie(vnode->volume->cache, @@ -277,7 +280,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, struct inode *inode; int ret; - _enter(",{%x:%u.%u},,", fid->vid, fid->vnode, fid->unique); + _enter(",{%llx:%llu.%u},,", fid->vid, fid->vnode, fid->unique); as = sb->s_fs_info; data.volume = as->volume; @@ -289,7 +292,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, return ERR_PTR(-ENOMEM); } - _debug("GOT INODE %p { vl=%x vn=%x, u=%x }", + _debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }", inode, fid->vid, fid->vnode, fid->unique); vnode = AFS_FS_I(inode); @@ -314,11 +317,11 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, * didn't give us a callback) */ vnode->cb_version = 0; vnode->cb_type = 0; - vnode->cb_expires_at = 0; + vnode->cb_expires_at = ktime_get(); } else { vnode->cb_version = cb->version; vnode->cb_type = cb->type; - vnode->cb_expires_at = cb->expiry; + vnode->cb_expires_at = cb->expires_at; vnode->cb_interest = afs_get_cb_interest(cbi); set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } @@ -352,7 +355,7 @@ bad_inode: */ void afs_zap_data(struct afs_vnode *vnode) { - _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); #ifdef CONFIG_AFS_FSCACHE fscache_invalidate(vnode->cache); @@ -382,7 +385,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) bool valid = false; int ret; - _enter("{v={%x:%u} fl=%lx},%x", + _enter("{v={%llx:%llu} fl=%lx},%x", vnode->fid.vid, vnode->fid.vnode, vnode->flags, key_serial(key)); @@ -501,7 +504,7 @@ void afs_evict_inode(struct inode *inode) vnode = AFS_FS_I(inode); - _enter("{%x:%u.%d}", + _enter("{%llx:%llu.%d}", vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); @@ -550,7 +553,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) struct key *key; int ret; - _enter("{%x:%u},{n=%pd},%x", + _enter("{%llx:%llu},{n=%pd},%x", vnode->fid.vid, vnode->fid.vnode, dentry, attr->ia_valid); diff --git a/fs/afs/internal.h b/fs/afs/internal.h @@ -22,6 +22,7 @@ #include <linux/backing-dev.h> #include <linux/uuid.h> #include <linux/mm_types.h> +#include <linux/dns_resolver.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/sock.h> @@ -75,10 +76,13 @@ struct afs_addr_list { u32 version; /* Version */ unsigned char max_addrs; unsigned char nr_addrs; - unsigned char index; /* Address currently in use */ + unsigned char preferred; /* Preferred address */ unsigned char nr_ipv4; /* Number of IPv4 addresses */ + enum dns_record_source source:8; + enum dns_lookup_status status:8; unsigned long probed; /* Mask of servers that have been probed */ - unsigned long yfs; /* Mask of servers that are YFS */ + unsigned long failed; /* Mask of addrs that failed locally/ICMP */ + unsigned long responded; /* Mask of addrs that responded */ struct sockaddr_rxrpc addrs[]; #define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8)) }; @@ -88,6 +92,7 @@ struct afs_addr_list { */ struct afs_call { const struct afs_call_type *type; /* type of call */ + struct afs_addr_list *alist; /* Address is alist[addr_ix] */ wait_queue_head_t waitq; /* processes awaiting completion */ struct work_struct async_work; /* async I/O processor */ struct work_struct work; /* actual work processor */ @@ -98,16 +103,22 @@ struct afs_call { struct afs_cb_interest *cbi; /* Callback interest for server used */ void *request; /* request data (first part) */ struct address_space *mapping; /* Pages being written from */ + struct iov_iter iter; /* Buffer iterator */ + struct iov_iter *_iter; /* Iterator currently in use */ + union { /* Convenience for ->iter */ + struct kvec kvec[1]; + struct bio_vec bvec[1]; + }; void *buffer; /* reply receive buffer */ void *reply[4]; /* Where to put the reply */ pgoff_t first; /* first page in mapping to deal with */ pgoff_t last; /* last page in mapping to deal with */ - size_t offset; /* offset into received data store */ atomic_t usage; enum afs_call_state state; spinlock_t state_lock; int error; /* error code */ u32 abort_code; /* Remote abort ID or 0 */ + u32 epoch; unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned first_offset; /* offset into mapping[first] */ @@ -117,19 +128,28 @@ struct afs_call { unsigned count2; /* count used in unmarshalling */ }; unsigned char unmarshall; /* unmarshalling phase */ + unsigned char addr_ix; /* Address in ->alist */ bool incoming; /* T if incoming call */ bool send_pages; /* T if data from mapping should be sent */ bool need_attention; /* T if RxRPC poked us */ bool async; /* T if asynchronous */ bool ret_reply0; /* T if should return reply[0] on success */ bool upgrade; /* T to request service upgrade */ + bool want_reply_time; /* T if want reply_time */ u16 service_id; /* Actual service ID (after upgrade) */ unsigned int debug_id; /* Trace ID */ u32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ - __be32 tmp; /* place to extract temporary data */ + union { /* place to extract temporary data */ + struct { + __be32 tmp_u; + __be32 tmp; + } __attribute__((packed)); + __be64 tmp64; + }; afs_dataversion_t expected_version; /* Updated version expected from store */ afs_dataversion_t expected_version_2; /* 2nd updated version expected from store */ + ktime_t reply_time; /* Time of first reply packet */ }; struct afs_call_type { @@ -146,6 +166,9 @@ struct afs_call_type { /* Work function */ void (*work)(struct work_struct *work); + + /* Call done function (gets called immediately on success or failure) */ + void (*done)(struct afs_call *call); }; /* @@ -185,6 +208,7 @@ struct afs_read { refcount_t usage; unsigned int index; /* Which page we're reading into */ unsigned int nr_pages; + unsigned int offset; /* offset into current page */ void (*page_done)(struct afs_call *, struct afs_read *); struct page **pages; struct page *array[]; @@ -343,13 +367,70 @@ struct afs_cell { rwlock_t proc_lock; /* VL server list. */ - rwlock_t vl_addrs_lock; /* Lock on vl_addrs */ - struct afs_addr_list __rcu *vl_addrs; /* List of VL servers */ + rwlock_t vl_servers_lock; /* Lock on vl_servers */ + struct afs_vlserver_list __rcu *vl_servers; + u8 name_len; /* Length of name */ char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */ }; /* + * Volume Location server record. + */ +struct afs_vlserver { + struct rcu_head rcu; + struct afs_addr_list __rcu *addresses; /* List of addresses for this VL server */ + unsigned long flags; +#define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */ +#define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */ +#define AFS_VLSERVER_FL_IS_YFS 2 /* Server is YFS not AFS */ + rwlock_t lock; /* Lock on addresses */ + atomic_t usage; + + /* Probe state */ + wait_queue_head_t probe_wq; + atomic_t probe_outstanding; + spinlock_t probe_lock; + struct { + unsigned int rtt; /* RTT as ktime/64 */ + u32 abort_code; + short error; + bool have_result; + bool responded:1; + bool is_yfs:1; + bool not_yfs:1; + bool local_failure:1; + } probe; + + u16 port; + u16 name_len; /* Length of name */ + char name[]; /* Server name, case-flattened */ +}; + +/* + * Weighted list of Volume Location servers. + */ +struct afs_vlserver_entry { + u16 priority; /* Preference (as SRV) */ + u16 weight; /* Weight (as SRV) */ + enum dns_record_source source:8; + enum dns_lookup_status status:8; + struct afs_vlserver *server; +}; + +struct afs_vlserver_list { + struct rcu_head rcu; + atomic_t usage; + u8 nr_servers; + u8 index; /* Server currently in use */ + u8 preferred; /* Preferred server */ + enum dns_record_source source:8; + enum dns_lookup_status status:8; + rwlock_t lock; + struct afs_vlserver_entry servers[]; +}; + +/* * Cached VLDB entry. * * This is pointed to by cell->vldb_entries, indexed by name. @@ -403,8 +484,12 @@ struct afs_server { #define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */ #define AFS_SERVER_FL_NO_IBULK 7 /* Fileserver doesn't support FS.InlineBulkStatus */ #define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */ +#define AFS_SERVER_FL_IS_YFS 9 /* Server is YFS not AFS */ +#define AFS_SERVER_FL_NO_RM2 10 /* Fileserver doesn't support YFS.RemoveFile2 */ +#define AFS_SERVER_FL_HAVE_EPOCH 11 /* ->epoch is valid */ atomic_t usage; u32 addr_version; /* Address list version */ + u32 cm_epoch; /* Server RxRPC epoch */ /* file service access */ rwlock_t fs_lock; /* access lock */ @@ -413,6 +498,26 @@ struct afs_server { struct hlist_head cb_volumes; /* List of volume interests on this server */ unsigned cb_s_break; /* Break-everything counter. */ rwlock_t cb_break_lock; /* Volume finding lock */ + + /* Probe state */ + wait_queue_head_t probe_wq; + atomic_t probe_outstanding; + spinlock_t probe_lock; + struct { + unsigned int rtt; /* RTT as ktime/64 */ + u32 abort_code; + u32 cm_epoch; + short error; + bool have_result; + bool responded:1; + bool is_yfs:1; + bool not_yfs:1; + bool local_failure:1; + bool no_epoch:1; + bool cm_probed:1; + bool said_rebooted:1; + bool said_inconsistent:1; + } probe; }; /* @@ -447,8 +552,8 @@ struct afs_server_entry { struct afs_server_list { refcount_t usage; - unsigned short nr_servers; - unsigned short index; /* Server currently in use */ + unsigned char nr_servers; + unsigned char preferred; /* Preferred server */ unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */ unsigned int seq; /* Set to ->servers_seq when installed */ rwlock_t lock; @@ -550,6 +655,15 @@ struct afs_vnode { afs_callback_type_t cb_type; /* type of callback */ }; +static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode) +{ +#ifdef CONFIG_AFS_FSCACHE + return vnode->cache; +#else + return NULL; +#endif +} + /* * cached security record for one user's attempt to access a vnode */ @@ -586,13 +700,31 @@ struct afs_interface { */ struct afs_addr_cursor { struct afs_addr_list *alist; /* Current address list (pins ref) */ - struct sockaddr_rxrpc *addr; + unsigned long tried; /* Tried addresses */ + signed char index; /* Current address */ + bool responded; /* T if the current address responded */ + unsigned short nr_iterations; /* Number of address iterations */ + short error; u32 abort_code; - unsigned short start; /* Starting point in alist->addrs[] */ - unsigned short index; /* Wrapping offset from start to current addr */ +}; + +/* + * Cursor for iterating over a set of volume location servers. + */ +struct afs_vl_cursor { + struct afs_addr_cursor ac; + struct afs_cell *cell; /* The cell we're querying */ + struct afs_vlserver_list *server_list; /* Current server list (pins ref) */ + struct afs_vlserver *server; /* Server on which this resides */ + struct key *key; /* Key for the server */ + unsigned long untried; /* Bitmask of untried servers */ + short index; /* Current server */ short error; - bool begun; /* T if we've begun iteration */ - bool responded; /* T if the current address responded */ + unsigned short flags; +#define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */ +#define AFS_VL_CURSOR_RETRY 0x0002 /* Set to do a retry */ +#define AFS_VL_CURSOR_RETRIED 0x0004 /* Set if started a retry */ + unsigned short nr_iterations; /* Number of server iterations */ }; /* @@ -604,10 +736,11 @@ struct afs_fs_cursor { struct afs_server_list *server_list; /* Current server list (pins ref) */ struct afs_cb_interest *cbi; /* Server on which this resides (pins ref) */ struct key *key; /* Key for the server */ + unsigned long untried; /* Bitmask of untried servers */ unsigned int cb_break; /* cb_break + cb_s_break before the call */ unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */ - unsigned char start; /* Initial index in server list */ - unsigned char index; /* Number of servers tried beyond start */ + short index; /* Current server */ + short error; unsigned short flags; #define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */ #define AFS_FS_CURSOR_VBUSY 0x0002 /* Set if seen VBUSY */ @@ -615,6 +748,7 @@ struct afs_fs_cursor { #define AFS_FS_CURSOR_VNOVOL 0x0008 /* Set if seen VNOVOL */ #define AFS_FS_CURSOR_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */ #define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ + unsigned short nr_iterations; /* Number of server iterations */ }; /* @@ -640,12 +774,12 @@ extern struct afs_addr_list *afs_alloc_addrlist(unsigned int, unsigned short, unsigned short); extern void afs_put_addrlist(struct afs_addr_list *); -extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char, - unsigned short, unsigned short); -extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *); +extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *, + const char *, size_t, char, + unsigned short, unsigned short); +extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *); extern bool afs_iterate_addresses(struct afs_addr_cursor *); extern int afs_end_cursor(struct afs_addr_cursor *); -extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *); extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16); extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16); @@ -668,6 +802,7 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def; * callback.c */ extern void afs_init_callback_state(struct afs_server *); +extern void __afs_break_callback(struct afs_vnode *); extern void afs_break_callback(struct afs_vnode *); extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*); @@ -688,10 +823,13 @@ static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode) return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break; } -static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode, - struct afs_cb_interest *cbi) +static inline bool afs_cb_is_broken(unsigned int cb_break, + const struct afs_vnode *vnode, + const struct afs_cb_interest *cbi) { - return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break; + return !cbi || cb_break != (vnode->cb_break + + cbi->server->cb_s_break + + vnode->volume->cb_v_break); } /* @@ -781,7 +919,7 @@ extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *); extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *); extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, u64, struct afs_fid *, struct afs_file_status *, struct afs_callback *); -extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool, u64); +extern int afs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64); extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64); extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64, struct afs_fid *, struct afs_file_status *); @@ -797,7 +935,7 @@ extern int afs_fs_release_lock(struct afs_fs_cursor *); extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *, struct afs_addr_cursor *, struct key *); extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *, - struct afs_addr_cursor *, struct key *); + struct afs_addr_cursor *, struct key *, unsigned int, bool); extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *, struct afs_fid *, struct afs_file_status *, struct afs_callback *, unsigned int, @@ -807,6 +945,13 @@ extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *, struct afs_callback *, struct afs_volsync *); /* + * fs_probe.c + */ +extern void afs_fileserver_probe_result(struct afs_call *); +extern int afs_probe_fileservers(struct afs_net *, struct key *, struct afs_server_list *); +extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long); + +/* * inode.c */ extern int afs_fetch_status(struct afs_vnode *, struct key *, bool); @@ -922,7 +1067,6 @@ extern int __net_init afs_open_socket(struct afs_net *); extern void __net_exit afs_close_socket(struct afs_net *); extern void afs_charge_preallocation(struct work_struct *); extern void afs_put_call(struct afs_call *); -extern int afs_queue_call_work(struct afs_call *); extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool); extern struct afs_call *afs_alloc_flat_call(struct afs_net *, const struct afs_call_type *, @@ -930,12 +1074,39 @@ extern struct afs_call *afs_alloc_flat_call(struct afs_net *, extern void afs_flat_call_destructor(struct afs_call *); extern void afs_send_empty_reply(struct afs_call *); extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); -extern int afs_extract_data(struct afs_call *, void *, size_t, bool); -extern int afs_protocol_error(struct afs_call *, int); +extern int afs_extract_data(struct afs_call *, bool); +extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause); + +static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size) +{ + call->kvec[0].iov_base = buf; + call->kvec[0].iov_len = size; + iov_iter_kvec(&call->iter, READ, call->kvec, 1, size); +} + +static inline void afs_extract_to_tmp(struct afs_call *call) +{ + afs_extract_begin(call, &call->tmp, sizeof(call->tmp)); +} + +static inline void afs_extract_to_tmp64(struct afs_call *call) +{ + afs_extract_begin(call, &call->tmp64, sizeof(call->tmp64)); +} + +static inline void afs_extract_discard(struct afs_call *call, size_t size) +{ + iov_iter_discard(&call->iter, READ, size); +} + +static inline void afs_extract_to_buf(struct afs_call *call, size_t size) +{ + afs_extract_begin(call, call->buffer, size); +} static inline int afs_transfer_reply(struct afs_call *call) { - return afs_extract_data(call, call->buffer, call->reply_max, false); + return afs_extract_data(call, false); } static inline bool afs_check_call_state(struct afs_call *call, @@ -1012,7 +1183,6 @@ extern void afs_put_server(struct afs_net *, struct afs_server *); extern void afs_manage_servers(struct work_struct *); extern void afs_servers_timer(struct timer_list *); extern void __net_exit afs_purge_servers(struct afs_net *); -extern bool afs_probe_fileserver(struct afs_fs_cursor *); extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *); /* @@ -1039,14 +1209,51 @@ extern void afs_fs_exit(void); /* * vlclient.c */ -extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *, - struct afs_addr_cursor *, - struct key *, const char *, int); -extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *, - struct key *, const uuid_t *); -extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *); -extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *, - struct key *, const uuid_t *); +extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *, + const char *, int); +extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *); +extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *, + struct afs_vlserver *, unsigned int, bool); +extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *); + +/* + * vl_probe.c + */ +extern void afs_vlserver_probe_result(struct afs_call *); +extern int afs_send_vl_probes(struct afs_net *, struct key *, struct afs_vlserver_list *); +extern int afs_wait_for_vl_probes(struct afs_vlserver_list *, unsigned long); + +/* + * vl_rotate.c + */ +extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *, + struct afs_cell *, struct key *); +extern bool afs_select_vlserver(struct afs_vl_cursor *); +extern bool afs_select_current_vlserver(struct afs_vl_cursor *); +extern int afs_end_vlserver_operation(struct afs_vl_cursor *); + +/* + * vlserver_list.c + */ +static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver) +{ + atomic_inc(&vlserver->usage); + return vlserver; +} + +static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist) +{ + if (vllist) + atomic_inc(&vllist->usage); + return vllist; +} + +extern struct afs_vlserver *afs_alloc_vlserver(const char *, size_t, unsigned short); +extern void afs_put_vlserver(struct afs_net *, struct afs_vlserver *); +extern struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int); +extern void afs_put_vlserverlist(struct afs_net *, struct afs_vlserver_list *); +extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *, + const void *, size_t); /* * volume.c @@ -1089,6 +1296,36 @@ extern int afs_launder_page(struct page *); extern const struct xattr_handler *afs_xattr_handlers[]; extern ssize_t afs_listxattr(struct dentry *, char *, size_t); +/* + * yfsclient.c + */ +extern int yfs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool); +extern int yfs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *); +extern int yfs_fs_create_file(struct afs_fs_cursor *, const char *, umode_t, u64, + struct afs_fid *, struct afs_file_status *, struct afs_callback *); +extern int yfs_fs_make_dir(struct afs_fs_cursor *, const char *, umode_t, u64, + struct afs_fid *, struct afs_file_status *, struct afs_callback *); +extern int yfs_fs_remove_file2(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64); +extern int yfs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64); +extern int yfs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64); +extern int yfs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64, + struct afs_fid *, struct afs_file_status *); +extern int yfs_fs_rename(struct afs_fs_cursor *, const char *, + struct afs_vnode *, const char *, u64, u64); +extern int yfs_fs_store_data(struct afs_fs_cursor *, struct address_space *, + pgoff_t, pgoff_t, unsigned, unsigned); +extern int yfs_fs_setattr(struct afs_fs_cursor *, struct iattr *); +extern int yfs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *); +extern int yfs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t); +extern int yfs_fs_extend_lock(struct afs_fs_cursor *); +extern int yfs_fs_release_lock(struct afs_fs_cursor *); +extern int yfs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *, + struct afs_fid *, struct afs_file_status *, + struct afs_callback *, struct afs_volsync *); +extern int yfs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *, + struct afs_fid *, struct afs_file_status *, + struct afs_callback *, unsigned int, + struct afs_volsync *); /* * Miscellaneous inline functions. @@ -1120,6 +1357,17 @@ static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc, } } +static inline int afs_io_error(struct afs_call *call, enum afs_io_error where) +{ + trace_afs_io_error(call->debug_id, -EIO, where); + return -EIO; +} + +static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where) +{ + trace_afs_file_error(vnode, -EIO, where); + return -EIO; +} /*****************************************************************************/ /* diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c @@ -130,9 +130,10 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) goto error_no_page; } - ret = -EIO; - if (PageError(page)) + if (PageError(page)) { + ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt); goto error; + } buf = kmap_atomic(page); memcpy(devname, buf, size); diff --git a/fs/afs/proc.c b/fs/afs/proc.c @@ -17,6 +17,11 @@ #include <linux/uaccess.h> #include "internal.h" +struct afs_vl_seq_net_private { + struct seq_net_private seq; /* Must be first */ + struct afs_vlserver_list *vllist; +}; + static inline struct afs_net *afs_seq2net(struct seq_file *m) { return afs_net(seq_file_net(m)); @@ -32,16 +37,24 @@ static inline struct afs_net *afs_seq2net_single(struct seq_file *m) */ static int afs_proc_cells_show(struct seq_file *m, void *v) { - struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); + struct afs_vlserver_list *vllist; + struct afs_cell *cell; if (v == SEQ_START_TOKEN) { /* display header on line 1 */ - seq_puts(m, "USE NAME\n"); + seq_puts(m, "USE TTL SV NAME\n"); return 0; } + cell = list_entry(v, struct afs_cell, proc_link); + vllist = rcu_dereference(cell->vl_servers); + /* display one cell per line on subsequent lines */ - seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name); + seq_printf(m, "%3u %6lld %2u %s\n", + atomic_read(&cell->usage), + cell->dns_expiry - ktime_get_real_seconds(), + vllist ? vllist->nr_servers : 0, + cell->name); return 0; } @@ -208,7 +221,7 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) return 0; } - seq_printf(m, "%3d %08x %s\n", + seq_printf(m, "%3d %08llx %s\n", atomic_read(&vol->usage), vol->vid, afs_vol_types[vol->type]); @@ -247,61 +260,102 @@ static const struct seq_operations afs_proc_cell_volumes_ops = { .show = afs_proc_cell_volumes_show, }; +static const char *const dns_record_sources[NR__dns_record_source + 1] = { + [DNS_RECORD_UNAVAILABLE] = "unav", + [DNS_RECORD_FROM_CONFIG] = "cfg", + [DNS_RECORD_FROM_DNS_A] = "A", + [DNS_RECORD_FROM_DNS_AFSDB] = "AFSDB", + [DNS_RECORD_FROM_DNS_SRV] = "SRV", + [DNS_RECORD_FROM_NSS] = "nss", + [NR__dns_record_source] = "[weird]" +}; + +static const char *const dns_lookup_statuses[NR__dns_lookup_status + 1] = { + [DNS_LOOKUP_NOT_DONE] = "no-lookup", + [DNS_LOOKUP_GOOD] = "good", + [DNS_LOOKUP_GOOD_WITH_BAD] = "good/bad", + [DNS_LOOKUP_BAD] = "bad", + [DNS_LOOKUP_GOT_NOT_FOUND] = "not-found", + [DNS_LOOKUP_GOT_LOCAL_FAILURE] = "local-failure", + [DNS_LOOKUP_GOT_TEMP_FAILURE] = "temp-failure", + [DNS_LOOKUP_GOT_NS_FAILURE] = "ns-failure", + [NR__dns_lookup_status] = "[weird]" +}; + /* * Display the list of Volume Location servers we're using for a cell. */ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) { - struct sockaddr_rxrpc *addr = v; + const struct afs_vl_seq_net_private *priv = m->private; + const struct afs_vlserver_list *vllist = priv->vllist; + const struct afs_vlserver_entry *entry; + const struct afs_vlserver *vlserver; + const struct afs_addr_list *alist; + int i; - /* display header on line 1 */ - if (v == (void *)1) { - seq_puts(m, "ADDRESS\n"); + if (v == SEQ_START_TOKEN) { + seq_printf(m, "# source %s, status %s\n", + dns_record_sources[vllist->source], + dns_lookup_statuses[vllist->status]); return 0; } - /* display one cell per line on subsequent lines */ - seq_printf(m, "%pISp\n", &addr->transport); + entry = v; + vlserver = entry->server; + alist = rcu_dereference(vlserver->addresses); + + seq_printf(m, "%s [p=%hu w=%hu s=%s,%s]:\n", + vlserver->name, entry->priority, entry->weight, + dns_record_sources[alist ? alist->source : entry->source], + dns_lookup_statuses[alist ? alist->status : entry->status]); + if (alist) { + for (i = 0; i < alist->nr_addrs; i++) + seq_printf(m, " %c %pISpc\n", + alist->preferred == i ? '>' : '-', + &alist->addrs[i].transport); + } return 0; } static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { - struct afs_addr_list *alist; + struct afs_vl_seq_net_private *priv = m->private; + struct afs_vlserver_list *vllist; struct afs_cell *cell = PDE_DATA(file_inode(m->file)); loff_t pos = *_pos; rcu_read_lock(); - alist = rcu_dereference(cell->vl_addrs); + vllist = rcu_dereference(cell->vl_servers); + priv->vllist = vllist; - /* allow for the header line */ - if (!pos) - return (void *) 1; - pos--; + if (pos < 0) + *_pos = pos = 0; + if (pos == 0) + return SEQ_START_TOKEN; - if (!alist || pos >= alist->nr_addrs) + if (!vllist || pos - 1 >= vllist->nr_servers) return NULL; - return alist->addrs + pos; + return &vllist->servers[pos - 1]; } static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v, loff_t *_pos) { - struct afs_addr_list *alist; - struct afs_cell *cell = PDE_DATA(file_inode(m->file)); + struct afs_vl_seq_net_private *priv = m->private; + struct afs_vlserver_list *vllist = priv->vllist; loff_t pos; - alist = rcu_dereference(cell->vl_addrs); - pos = *_pos; - (*_pos)++; - if (!alist || pos >= alist->nr_addrs) + pos++; + *_pos = pos; + if (!vllist || pos - 1 >= vllist->nr_servers) return NULL; - return alist->addrs + pos; + return &vllist->servers[pos - 1]; } static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v) @@ -337,11 +391,11 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) &server->uuid, atomic_read(&server->usage), &alist->addrs[0].transport, - alist->index == 0 ? "*" : ""); + alist->preferred == 0 ? "*" : ""); for (i = 1; i < alist->nr_addrs; i++) seq_printf(m, " %pISpc%s\n", &alist->addrs[i].transport, - alist->index == i ? "*" : ""); + alist->preferred == i ? "*" : ""); return 0; } @@ -562,7 +616,7 @@ int afs_proc_cell_setup(struct afs_cell *cell) if (!proc_create_net_data("vlservers", 0444, dir, &afs_proc_cell_vlservers_ops, - sizeof(struct seq_net_private), + sizeof(struct afs_vl_seq_net_private), cell) || !proc_create_net_data("volumes", 0444, dir, &afs_proc_cell_volumes_ops, diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h @@ -0,0 +1,163 @@ +/* YFS protocol bits + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define YFS_FS_SERVICE 2500 +#define YFS_CM_SERVICE 2501 + +#define YFSCBMAX 1024 + +enum YFS_CM_Operations { + YFSCBProbe = 206, /* probe client */ + YFSCBGetLock = 207, /* get contents of CM lock table */ + YFSCBXStatsVersion = 209, /* get version of extended statistics */ + YFSCBGetXStats = 210, /* get contents of extended statistics data */ + YFSCBInitCallBackState3 = 213, /* initialise callback state, version 3 */ + YFSCBProbeUuid = 214, /* check the client hasn't rebooted */ + YFSCBGetServerPrefs = 215, + YFSCBGetCellServDV = 216, + YFSCBGetLocalCell = 217, + YFSCBGetCacheConfig = 218, + YFSCBGetCellByNum = 65537, + YFSCBTellMeAboutYourself = 65538, /* get client capabilities */ + YFSCBCallBack = 64204, +}; + +enum YFS_FS_Operations { + YFSFETCHACL = 64131, /* YFS Fetch file ACL */ + YFSFETCHSTATUS = 64132, /* YFS Fetch file status */ + YFSSTOREACL = 64134, /* YFS Store file ACL */ + YFSSTORESTATUS = 64135, /* YFS Store file status */ + YFSREMOVEFILE = 64136, /* YFS Remove a file */ + YFSCREATEFILE = 64137, /* YFS Create a file */ + YFSRENAME = 64138, /* YFS Rename or move a file or directory */ + YFSSYMLINK = 64139, /* YFS Create a symbolic link */ + YFSLINK = 64140, /* YFS Create a hard link */ + YFSMAKEDIR = 64141, /* YFS Create a directory */ + YFSREMOVEDIR = 64142, /* YFS Remove a directory */ + YFSGETVOLUMESTATUS = 64149, /* YFS Get volume status information */ + YFSSETVOLUMESTATUS = 64150, /* YFS Set volume status information */ + YFSSETLOCK = 64156, /* YFS Request a file lock */ + YFSEXTENDLOCK = 64157, /* YFS Extend a file lock */ + YFSRELEASELOCK = 64158, /* YFS Release a file lock */ + YFSLOOKUP = 64161, /* YFS lookup file in directory */ + YFSFLUSHCPS = 64165, + YFSFETCHOPAQUEACL = 64168, + YFSWHOAMI = 64170, + YFSREMOVEACL = 64171, + YFSREMOVEFILE2 = 64173, + YFSSTOREOPAQUEACL2 = 64174, + YFSINLINEBULKSTATUS = 64536, /* YFS Fetch multiple file statuses with errors */ + YFSFETCHDATA64 = 64537, /* YFS Fetch file data */ + YFSSTOREDATA64 = 64538, /* YFS Store file data */ + YFSUPDATESYMLINK = 64540, +}; + +struct yfs_xdr_u64 { + __be32 msw; + __be32 lsw; +} __packed; + +static inline u64 xdr_to_u64(const struct yfs_xdr_u64 x) +{ + return ((u64)ntohl(x.msw) << 32) | ntohl(x.lsw); +} + +static inline struct yfs_xdr_u64 u64_to_xdr(const u64 x) +{ + return (struct yfs_xdr_u64){ .msw = htonl(x >> 32), .lsw = htonl(x) }; +} + +struct yfs_xdr_vnode { + struct yfs_xdr_u64 lo; + __be32 hi; + __be32 unique; +} __packed; + +struct yfs_xdr_YFSFid { + struct yfs_xdr_u64 volume; + struct yfs_xdr_vnode vnode; +} __packed; + + +struct yfs_xdr_YFSFetchStatus { + __be32 type; + __be32 nlink; + struct yfs_xdr_u64 size; + struct yfs_xdr_u64 data_version; + struct yfs_xdr_u64 author; + struct yfs_xdr_u64 owner; + struct yfs_xdr_u64 group; + __be32 mode; + __be32 caller_access; + __be32 anon_access; + struct yfs_xdr_vnode parent; + __be32 data_access_protocol; + struct yfs_xdr_u64 mtime_client; + struct yfs_xdr_u64 mtime_server; + __be32 lock_count; + __be32 abort_code; +} __packed; + +struct yfs_xdr_YFSCallBack { + __be32 version; + struct yfs_xdr_u64 expiration_time; + __be32 type; +} __packed; + +struct yfs_xdr_YFSStoreStatus { + __be32 mask; + __be32 mode; + struct yfs_xdr_u64 mtime_client; + struct yfs_xdr_u64 owner; + struct yfs_xdr_u64 group; +} __packed; + +struct yfs_xdr_RPCFlags { + __be32 rpc_flags; +} __packed; + +struct yfs_xdr_YFSVolSync { + struct yfs_xdr_u64 vol_creation_date; + struct yfs_xdr_u64 vol_update_date; + struct yfs_xdr_u64 max_quota; + struct yfs_xdr_u64 blocks_in_use; + struct yfs_xdr_u64 blocks_avail; +} __packed; + +enum yfs_volume_type { + yfs_volume_type_ro = 0, + yfs_volume_type_rw = 1, +}; + +#define yfs_FVSOnline 0x1 +#define yfs_FVSInservice 0x2 +#define yfs_FVSBlessed 0x4 +#define yfs_FVSNeedsSalvage 0x8 + +struct yfs_xdr_YFSFetchVolumeStatus { + struct yfs_xdr_u64 vid; + struct yfs_xdr_u64 parent_id; + __be32 flags; + __be32 type; + struct yfs_xdr_u64 max_quota; + struct yfs_xdr_u64 blocks_in_use; + struct yfs_xdr_u64 part_blocks_avail; + struct yfs_xdr_u64 part_max_blocks; + struct yfs_xdr_u64 vol_copy_date; + struct yfs_xdr_u64 vol_backup_date; +} __packed; + +struct yfs_xdr_YFSStoreVolumeStatus { + __be32 mask; + struct yfs_xdr_u64 min_quota; + struct yfs_xdr_u64 max_quota; + struct yfs_xdr_u64 file_quota; +} __packed; diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c @@ -19,14 +19,6 @@ #include "afs_fs.h" /* - * Initialise a filesystem server cursor for iterating over FS servers. - */ -static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) -{ - memset(fc, 0, sizeof(*fc)); -} - -/* * Begin an operation on the fileserver. * * Fileserver operations are serialised on the server by vnode, so we serialise @@ -35,13 +27,14 @@ static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, struct key *key) { - afs_init_fs_cursor(fc, vnode); + memset(fc, 0, sizeof(*fc)); fc->vnode = vnode; fc->key = key; fc->ac.error = SHRT_MAX; + fc->error = -EDESTADDRREQ; if (mutex_lock_interruptible(&vnode->io_lock) < 0) { - fc->ac.error = -EINTR; + fc->error = -EINTR; fc->flags |= AFS_FS_CURSOR_STOP; return false; } @@ -65,12 +58,15 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, fc->server_list = afs_get_serverlist(vnode->volume->servers); read_unlock(&vnode->volume->servers_lock); + fc->untried = (1UL << fc->server_list->nr_servers) - 1; + fc->index = READ_ONCE(fc->server_list->preferred); + cbi = vnode->cb_interest; if (cbi) { /* See if the vnode's preferred record is still available */ for (i = 0; i < fc->server_list->nr_servers; i++) { if (fc->server_list->servers[i].cb_interest == cbi) { - fc->start = i; + fc->index = i; goto found_interest; } } @@ -80,7 +76,7 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, * and have to return an error. */ if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { - fc->ac.error = -ESTALE; + fc->error = -ESTALE; return false; } @@ -94,12 +90,9 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, afs_put_cb_interest(afs_v2net(vnode), cbi); cbi = NULL; - } else { - fc->start = READ_ONCE(fc->server_list->index); } found_interest: - fc->index = fc->start; return true; } @@ -117,7 +110,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code) default: m = "busy"; break; } - pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m); + pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m); } /* @@ -127,7 +120,7 @@ static bool afs_sleep_and_retry(struct afs_fs_cursor *fc) { msleep_interruptible(1000); if (signal_pending(current)) { - fc->ac.error = -ERESTARTSYS; + fc->error = -ERESTARTSYS; return false; } @@ -143,27 +136,32 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) struct afs_addr_list *alist; struct afs_server *server; struct afs_vnode *vnode = fc->vnode; + u32 rtt, abort_code; + int error = fc->ac.error, i; - _enter("%u/%u,%u/%u,%d,%d", - fc->index, fc->start, - fc->ac.index, fc->ac.start, - fc->ac.error, fc->ac.abort_code); + _enter("%lx[%d],%lx[%d],%d,%d", + fc->untried, fc->index, + fc->ac.tried, fc->ac.index, + error, fc->ac.abort_code); if (fc->flags & AFS_FS_CURSOR_STOP) { _leave(" = f [stopped]"); return false; } + fc->nr_iterations++; + /* Evaluate the result of the previous operation, if there was one. */ - switch (fc->ac.error) { + switch (error) { case SHRT_MAX: goto start; case 0: default: /* Success or local failure. Stop. */ + fc->error = error; fc->flags |= AFS_FS_CURSOR_STOP; - _leave(" = f [okay/local %d]", fc->ac.error); + _leave(" = f [okay/local %d]", error); return false; case -ECONNABORTED: @@ -178,7 +176,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) * - May indicate that the fileserver couldn't attach to the vol. */ if (fc->flags & AFS_FS_CURSOR_VNOVOL) { - fc->ac.error = -EREMOTEIO; + fc->error = -EREMOTEIO; goto next_server; } @@ -187,12 +185,12 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) write_unlock(&vnode->volume->servers_lock); set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); - fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); - if (fc->ac.error < 0) - goto failed; + error = afs_check_volume_status(vnode->volume, fc->key); + if (error < 0) + goto failed_set_error; if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) { - fc->ac.error = -ENOMEDIUM; + fc->error = -ENOMEDIUM; goto failed; } @@ -200,7 +198,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) * it's the fileserver having trouble. */ if (vnode->volume->servers == fc->server_list) { - fc->ac.error = -EREMOTEIO; + fc->error = -EREMOTEIO; goto next_server; } @@ -215,7 +213,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) case VONLINE: case VDISKFULL: case VOVERQUOTA: - fc->ac.error = afs_abort_to_error(fc->ac.abort_code); + fc->error = afs_abort_to_error(fc->ac.abort_code); goto next_server; case VOFFLINE: @@ -224,11 +222,11 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); } if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { - fc->ac.error = -EADV; + fc->error = -EADV; goto failed; } if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { - fc->ac.error = -ESTALE; + fc->error = -ESTALE; goto failed; } goto busy; @@ -240,7 +238,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) * have a file lock we need to maintain. */ if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { - fc->ac.error = -EBUSY; + fc->error = -EBUSY; goto failed; } if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) { @@ -269,16 +267,16 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) * honour, just in case someone sets up a loop. */ if (fc->flags & AFS_FS_CURSOR_VMOVED) { - fc->ac.error = -EREMOTEIO; + fc->error = -EREMOTEIO; goto failed; } fc->flags |= AFS_FS_CURSOR_VMOVED; set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags); set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); - fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); - if (fc->ac.error < 0) - goto failed; + error = afs_check_volume_status(vnode->volume, fc->key); + if (error < 0) + goto failed_set_error; /* If the server list didn't change, then the VLDB is * out of sync with the fileservers. This is hopefully @@ -290,7 +288,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) * TODO: Retry a few times with sleeps. */ if (vnode->volume->servers == fc->server_list) { - fc->ac.error = -ENOMEDIUM; + fc->error = -ENOMEDIUM; goto failed; } @@ -299,20 +297,25 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) default: clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); - fc->ac.error = afs_abort_to_error(fc->ac.abort_code); + fc->error = afs_abort_to_error(fc->ac.abort_code); goto failed; } + case -ETIMEDOUT: + case -ETIME: + if (fc->error != -EDESTADDRREQ) + goto iterate_address; + /* Fall through */ case -ENETUNREACH: case -EHOSTUNREACH: case -ECONNREFUSED: - case -ETIMEDOUT: - case -ETIME: _debug("no conn"); + fc->error = error; goto iterate_address; case -ECONNRESET: _debug("call reset"); + fc->error = error; goto failed; } @@ -328,15 +331,57 @@ start: /* See if we need to do an update of the volume record. Note that the * volume may have moved or even have been deleted. */ - fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); - if (fc->ac.error < 0) - goto failed; + error = afs_check_volume_status(vnode->volume, fc->key); + if (error < 0) + goto failed_set_error; if (!afs_start_fs_iteration(fc, vnode)) goto failed; -use_server: - _debug("use"); + _debug("__ VOL %llx __", vnode->volume->vid); + error = afs_probe_fileservers(afs_v2net(vnode), fc->key, fc->server_list); + if (error < 0) + goto failed_set_error; + +pick_server: + _debug("pick [%lx]", fc->untried); + + error = afs_wait_for_fs_probes(fc->server_list, fc->untried); + if (error < 0) + goto failed_set_error; + + /* Pick the untried server with the lowest RTT. If we have outstanding + * callbacks, we stick with the server we're already using if we can. + */ + if (fc->cbi) { + _debug("cbi %u", fc->index); + if (test_bit(fc->index, &fc->untried)) + goto selected_server; + afs_put_cb_interest(afs_v2net(vnode), fc->cbi); + fc->cbi = NULL; + _debug("nocbi"); + } + + fc->index = -1; + rtt = U32_MAX; + for (i = 0; i < fc->server_list->nr_servers; i++) { + struct afs_server *s = fc->server_list->servers[i].server; + + if (!test_bit(i, &fc->untried) || !s->probe.responded) + continue; + if (s->probe.rtt < rtt) { + fc->index = i; + rtt = s->probe.rtt; + } + } + + if (fc->index == -1) + goto no_more_servers; + +selected_server: + _debug("use %d", fc->index); + __clear_bit(fc->index, &fc->untried); + /* We're starting on a different fileserver from the list. We need to * check it, create a callback intercept, find its address list and * probe its capabilities before we use it. @@ -354,10 +399,10 @@ use_server: * break request before we've finished decoding the reply and * installing the vnode. */ - fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list, - fc->index); - if (fc->ac.error < 0) - goto failed; + error = afs_register_server_cb_interest(vnode, fc->server_list, + fc->index); + if (error < 0) + goto failed_set_error; fc->cbi = afs_get_cb_interest(vnode->cb_interest); @@ -369,66 +414,88 @@ use_server: memset(&fc->ac, 0, sizeof(fc->ac)); - /* Probe the current fileserver if we haven't done so yet. */ - if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) { - fc->ac.alist = afs_get_addrlist(alist); - - if (!afs_probe_fileserver(fc)) { - switch (fc->ac.error) { - case -ENOMEM: - case -ERESTARTSYS: - case -EINTR: - goto failed; - default: - goto next_server; - } - } - } - if (!fc->ac.alist) fc->ac.alist = alist; else afs_put_addrlist(alist); - fc->ac.start = READ_ONCE(alist->index); - fc->ac.index = fc->ac.start; + fc->ac.index = -1; iterate_address: ASSERT(fc->ac.alist); - _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs); /* Iterate over the current server's address list to try and find an * address on which it will respond to us. */ if (!afs_iterate_addresses(&fc->ac)) goto next_server; + _debug("address [%u] %u/%u", fc->index, fc->ac.index, fc->ac.alist->nr_addrs); + _leave(" = t"); return true; next_server: _debug("next"); afs_end_cursor(&fc->ac); - afs_put_cb_interest(afs_v2net(vnode), fc->cbi); - fc->cbi = NULL; - fc->index++; - if (fc->index >= fc->server_list->nr_servers) - fc->index = 0; - if (fc->index != fc->start) - goto use_server; + goto pick_server; +no_more_servers: /* That's all the servers poked to no good effect. Try again if some * of them were busy. */ if (fc->flags & AFS_FS_CURSOR_VBUSY) goto restart_from_beginning; - fc->ac.error = -EDESTADDRREQ; - goto failed; + abort_code = 0; + error = -EDESTADDRREQ; + for (i = 0; i < fc->server_list->nr_servers; i++) { + struct afs_server *s = fc->server_list->servers[i].server; + int probe_error = READ_ONCE(s->probe.error); + + switch (probe_error) { + case 0: + continue; + default: + if (error == -ETIMEDOUT || + error == -ETIME) + continue; + case -ETIMEDOUT: + case -ETIME: + if (error == -ENOMEM || + error == -ENONET) + continue; + case -ENOMEM: + case -ENONET: + if (error == -ENETUNREACH) + continue; + case -ENETUNREACH: + if (error == -EHOSTUNREACH) + continue; + case -EHOSTUNREACH: + if (error == -ECONNREFUSED) + continue; + case -ECONNREFUSED: + if (error == -ECONNRESET) + continue; + case -ECONNRESET: /* Responded, but call expired. */ + if (error == -ECONNABORTED) + continue; + case -ECONNABORTED: + abort_code = s->probe.abort_code; + error = probe_error; + continue; + } + } + + if (error == -ECONNABORTED) + error = afs_abort_to_error(abort_code); +failed_set_error: + fc->error = error; failed: fc->flags |= AFS_FS_CURSOR_STOP; afs_end_cursor(&fc->ac); - _leave(" = f [failed %d]", fc->ac.error); + _leave(" = f [failed %d]", fc->error); return false; } @@ -442,13 +509,14 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc) struct afs_vnode *vnode = fc->vnode; struct afs_cb_interest *cbi = vnode->cb_interest; struct afs_addr_list *alist; + int error = fc->ac.error; _enter(""); - switch (fc->ac.error) { + switch (error) { case SHRT_MAX: if (!cbi) { - fc->ac.error = -ESTALE; + fc->error = -ESTALE; fc->flags |= AFS_FS_CURSOR_STOP; return false; } @@ -461,25 +529,26 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc) afs_get_addrlist(alist); read_unlock(&cbi->server->fs_lock); if (!alist) { - fc->ac.error = -ESTALE; + fc->error = -ESTALE; fc->flags |= AFS_FS_CURSOR_STOP; return false; } memset(&fc->ac, 0, sizeof(fc->ac)); fc->ac.alist = alist; - fc->ac.start = READ_ONCE(alist->index); - fc->ac.index = fc->ac.start; + fc->ac.index = -1; goto iterate_address; case 0: default: /* Success or local failure. Stop. */ + fc->error = error; fc->flags |= AFS_FS_CURSOR_STOP; - _leave(" = f [okay/local %d]", fc->ac.error); + _leave(" = f [okay/local %d]", error); return false; case -ECONNABORTED: + fc->error = afs_abort_to_error(fc->ac.abort_code); fc->flags |= AFS_FS_CURSOR_STOP; _leave(" = f [abort]"); return false; @@ -490,6 +559,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc) case -ETIMEDOUT: case -ETIME: _debug("no conn"); + fc->error = error; goto iterate_address; } @@ -507,12 +577,65 @@ iterate_address: } /* + * Dump cursor state in the case of the error being EDESTADDRREQ. + */ +static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc) +{ + static int count; + int i; + + if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) + return; + count++; + + rcu_read_lock(); + + pr_notice("EDESTADDR occurred\n"); + pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n", + fc->cb_break, fc->cb_break_2, fc->flags, fc->error); + pr_notice("FC: ut=%lx ix=%d ni=%u\n", + fc->untried, fc->index, fc->nr_iterations); + + if (fc->server_list) { + const struct afs_server_list *sl = fc->server_list; + pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n", + sl->nr_servers, sl->preferred, sl->vnovol_mask); + for (i = 0; i < sl->nr_servers; i++) { + const struct afs_server *s = sl->servers[i].server; + pr_notice("FC: server fl=%lx av=%u %pU\n", + s->flags, s->addr_version, &s->uuid); + if (s->addresses) { + const struct afs_addr_list *a = + rcu_dereference(s->addresses); + pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n", + a->version, + a->nr_ipv4, a->nr_addrs, a->max_addrs, + a->preferred); + pr_notice("FC: - pr=%lx R=%lx F=%lx\n", + a->probed, a->responded, a->failed); + if (a == fc->ac.alist) + pr_notice("FC: - current\n"); + } + } + } + + pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", + fc->ac.tried, fc->ac.index, fc->ac.abort_code, fc->ac.error, + fc->ac.responded, fc->ac.nr_iterations); + rcu_read_unlock(); +} + +/* * Tidy up a filesystem cursor and unlock the vnode. */ int afs_end_vnode_operation(struct afs_fs_cursor *fc) { struct afs_net *net = afs_v2net(fc->vnode); - int ret; + + if (fc->error == -EDESTADDRREQ || + fc->error == -ENETUNREACH || + fc->error == -EHOSTUNREACH) + afs_dump_edestaddrreq(fc); mutex_unlock(&fc->vnode->io_lock); @@ -520,9 +643,8 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc) afs_put_cb_interest(net, fc->cbi); afs_put_serverlist(net, fc->server_list); - ret = fc->ac.error; - if (ret == -ECONNABORTED) - afs_abort_to_error(fc->ac.abort_code); + if (fc->error == -ECONNABORTED) + fc->error = afs_abort_to_error(fc->ac.abort_code); - return fc->ac.error; + return fc->error; } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c @@ -16,6 +16,7 @@ #include <net/af_rxrpc.h> #include "internal.h" #include "afs_cm.h" +#include "protocol_yfs.h" struct workqueue_struct *afs_async_calls; @@ -75,6 +76,18 @@ int afs_open_socket(struct afs_net *net) if (ret < 0) goto error_2; + srx.srx_service = YFS_CM_SERVICE; + ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); + if (ret < 0) + goto error_2; + + /* Ideally, we'd turn on service upgrade here, but we can't because + * OpenAFS is buggy and leaks the userStatus field from packet to + * packet and between FS packets and CB packets - so if we try to do an + * upgrade on an FS packet, OpenAFS will leak that into the CB packet + * it sends back to us. + */ + rxrpc_kernel_new_call_notification(socket, afs_rx_new_call, afs_rx_discard_new_call); @@ -143,6 +156,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net, INIT_WORK(&call->async_work, afs_process_async_call); init_waitqueue_head(&call->waitq); spin_lock_init(&call->state_lock); + call->_iter = &call->iter; o = atomic_inc_return(&net->nr_outstanding_calls); trace_afs_call(call, afs_call_trace_alloc, 1, o, @@ -176,6 +190,7 @@ void afs_put_call(struct afs_call *call) afs_put_server(call->net, call->cm_server); afs_put_cb_interest(call->net, call->cbi); + afs_put_addrlist(call->alist); kfree(call->request); trace_afs_call(call, afs_call_trace_free, 0, o, @@ -189,21 +204,22 @@ void afs_put_call(struct afs_call *call) } /* - * Queue the call for actual work. Returns 0 unconditionally for convenience. + * Queue the call for actual work. */ -int afs_queue_call_work(struct afs_call *call) +static void afs_queue_call_work(struct afs_call *call) { - int u = atomic_inc_return(&call->usage); + if (call->type->work) { + int u = atomic_inc_return(&call->usage); - trace_afs_call(call, afs_call_trace_work, u, - atomic_read(&call->net->nr_outstanding_calls), - __builtin_return_address(0)); + trace_afs_call(call, afs_call_trace_work, u, + atomic_read(&call->net->nr_outstanding_calls), + __builtin_return_address(0)); - INIT_WORK(&call->work, call->type->work); + INIT_WORK(&call->work, call->type->work); - if (!queue_work(afs_wq, &call->work)) - afs_put_call(call); - return 0; + if (!queue_work(afs_wq, &call->work)) + afs_put_call(call); + } } /* @@ -233,6 +249,7 @@ struct afs_call *afs_alloc_flat_call(struct afs_net *net, goto nomem_free; } + afs_extract_to_buf(call, call->reply_max); call->operation_ID = type->op; init_waitqueue_head(&call->waitq); return call; @@ -286,7 +303,7 @@ static void afs_load_bvec(struct afs_call *call, struct msghdr *msg, offset = 0; } - iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes); + iov_iter_bvec(&msg->msg_iter, WRITE, bv, nr, bytes); } /* @@ -342,7 +359,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp, bool async) { - struct sockaddr_rxrpc *srx = ac->addr; + struct sockaddr_rxrpc *srx = &ac->alist->addrs[ac->index]; struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; @@ -359,6 +376,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, atomic_read(&call->net->nr_outstanding_calls)); call->async = async; + call->addr_ix = ac->index; + call->alist = afs_get_addrlist(ac->alist); /* Work out the length we're going to transmit. This is awkward for * calls such as FS.StoreData where there's an extra injection of data @@ -390,6 +409,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, call->debug_id); if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); + call->error = ret; goto error_kill_call; } @@ -401,8 +421,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, msg.msg_name = NULL; msg.msg_namelen = 0; - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, - call->request_size); + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0); @@ -432,7 +451,7 @@ error_do_abort: rxrpc_kernel_abort_call(call->net->socket, rxcall, RX_USER_ABORT, ret, "KSD"); } else { - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, NULL, 0, 0); + iov_iter_kvec(&msg.msg_iter, READ, NULL, 0, 0); rxrpc_kernel_recv_data(call->net->socket, rxcall, &msg.msg_iter, false, &call->abort_code, &call->service_id); @@ -442,6 +461,8 @@ error_do_abort: call->error = ret; trace_afs_call_done(call); error_kill_call: + if (call->type->done) + call->type->done(call); afs_put_call(call); ac->error = ret; _leave(" = %d", ret); @@ -466,14 +487,12 @@ static void afs_deliver_to_call(struct afs_call *call) state == AFS_CALL_SV_AWAIT_ACK ) { if (state == AFS_CALL_SV_AWAIT_ACK) { - struct iov_iter iter; - - iov_iter_kvec(&iter, READ | ITER_KVEC, NULL, 0, 0); + iov_iter_kvec(&call->iter, READ, NULL, 0, 0); ret = rxrpc_kernel_recv_data(call->net->socket, - call->rxcall, &iter, false, - &remote_abort, + call->rxcall, &call->iter, + false, &remote_abort, &call->service_id); - trace_afs_recv_data(call, 0, 0, false, ret); + trace_afs_receive_data(call, &call->iter, false, ret); if (ret == -EINPROGRESS || ret == -EAGAIN) return; @@ -485,10 +504,17 @@ static void afs_deliver_to_call(struct afs_call *call) return; } + if (call->want_reply_time && + rxrpc_kernel_get_reply_time(call->net->socket, + call->rxcall, + &call->reply_time)) + call->want_reply_time = false; + ret = call->type->deliver(call); state = READ_ONCE(call->state); switch (ret) { case 0: + afs_queue_call_work(call); if (state == AFS_CALL_CL_PROC_REPLY) { if (call->cbi) set_bit(AFS_SERVER_FL_MAY_HAVE_CB, @@ -500,7 +526,6 @@ static void afs_deliver_to_call(struct afs_call *call) case -EINPROGRESS: case -EAGAIN: goto out; - case -EIO: case -ECONNABORTED: ASSERTCMP(state, ==, AFS_CALL_COMPLETE); goto done; @@ -509,6 +534,10 @@ static void afs_deliver_to_call(struct afs_call *call) rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, "KIV"); goto local_abort; + case -EIO: + pr_err("kAFS: Call %u in bad state %u\n", + call->debug_id, state); + /* Fall through */ case -ENODATA: case -EBADMSG: case -EMSGSIZE: @@ -517,12 +546,14 @@ static void afs_deliver_to_call(struct afs_call *call) if (state != AFS_CALL_CL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, -EBADMSG, "KUM"); + abort_code, ret, "KUM"); goto local_abort; } } done: + if (call->type->done) + call->type->done(call); if (state == AFS_CALL_COMPLETE && call->incoming) afs_put_call(call); out: @@ -728,6 +759,7 @@ void afs_charge_preallocation(struct work_struct *work) call->async = true; call->state = AFS_CALL_SV_AWAIT_OP_ID; init_waitqueue_head(&call->waitq); + afs_extract_to_tmp(call); } if (rxrpc_kernel_charge_accept(net->socket, @@ -773,18 +805,15 @@ static int afs_deliver_cm_op_id(struct afs_call *call) { int ret; - _enter("{%zu}", call->offset); - - ASSERTCMP(call->offset, <, 4); + _enter("{%zu}", iov_iter_count(call->_iter)); /* the operation ID forms the first four bytes of the request data */ - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; call->operation_ID = ntohl(call->tmp); afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST); - call->offset = 0; /* ask the cache manager to route the call (it'll change the call type * if successful) */ @@ -825,7 +854,7 @@ void afs_send_empty_reply(struct afs_call *call) msg.msg_name = NULL; msg.msg_namelen = 0; - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0); + iov_iter_kvec(&msg.msg_iter, WRITE, NULL, 0, 0); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -864,7 +893,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) iov[0].iov_len = len; msg.msg_name = NULL; msg.msg_namelen = 0; - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, len); + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -888,30 +917,19 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) /* * Extract a piece of data from the received data socket buffers. */ -int afs_extract_data(struct afs_call *call, void *buf, size_t count, - bool want_more) +int afs_extract_data(struct afs_call *call, bool want_more) { struct afs_net *net = call->net; - struct iov_iter iter; - struct kvec iov; + struct iov_iter *iter = call->_iter; enum afs_call_state state; u32 remote_abort = 0; int ret; - _enter("{%s,%zu},,%zu,%d", - call->type->name, call->offset, count, want_more); - - ASSERTCMP(call->offset, <=, count); - - iov.iov_base = buf + call->offset; - iov.iov_len = count - call->offset; - iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, count - call->offset); + _enter("{%s,%zu},%d", call->type->name, iov_iter_count(iter), want_more); - ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter, + ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter, want_more, &remote_abort, &call->service_id); - call->offset += (count - call->offset) - iov_iter_count(&iter); - trace_afs_recv_data(call, count, call->offset, want_more, ret); if (ret == 0 || ret == -EAGAIN) return ret; @@ -926,7 +944,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, break; case AFS_CALL_COMPLETE: kdebug("prem complete %d", call->error); - return -EIO; + return afs_io_error(call, afs_io_error_extract); default: break; } @@ -940,8 +958,9 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, /* * Log protocol error production. */ -noinline int afs_protocol_error(struct afs_call *call, int error) +noinline int afs_protocol_error(struct afs_call *call, int error, + enum afs_eproto_cause cause) { - trace_afs_protocol_error(call, error, __builtin_return_address(0)); + trace_afs_protocol_error(call, error, cause); return error; } diff --git a/fs/afs/security.c b/fs/afs/security.c @@ -126,7 +126,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, bool changed = false; int i, j; - _enter("{%x:%u},%x,%x", + _enter("{%llx:%llu},%x,%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access); rcu_read_lock(); @@ -147,7 +147,8 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, break; } - if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) { + if (afs_cb_is_broken(cb_break, vnode, + vnode->cb_interest)) { changed = true; break; } @@ -177,7 +178,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, } } - if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) + if (afs_cb_is_broken(cb_break, vnode, vnode->cb_interest)) goto someone_else_changed_it; /* We need a ref on any permits list we want to copy as we'll have to @@ -256,7 +257,7 @@ found: spin_lock(&vnode->lock); zap = rcu_access_pointer(vnode->permit_cache); - if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) && + if (!afs_cb_is_broken(cb_break, vnode, vnode->cb_interest) && zap == permits) rcu_assign_pointer(vnode->permit_cache, replacement); else @@ -289,7 +290,7 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key, bool valid = false; int i, ret; - _enter("{%x:%u},%x", + _enter("{%llx:%llu},%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key)); /* check the permits to see if we've got one yet */ @@ -349,7 +350,7 @@ int afs_permission(struct inode *inode, int mask) if (mask & MAY_NOT_BLOCK) return -ECHILD; - _enter("{{%x:%u},%lx},%x,", + _enter("{{%llx:%llu},%lx},%x,", vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); key = afs_request_key(vnode->volume->cell); diff --git a/fs/afs/server.c b/fs/afs/server.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include "afs_fs.h" #include "internal.h" +#include "protocol_yfs.h" static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */ @@ -230,6 +231,8 @@ static struct afs_server *afs_alloc_server(struct afs_net *net, rwlock_init(&server->fs_lock); INIT_HLIST_HEAD(&server->cb_volumes); rwlock_init(&server->cb_break_lock); + init_waitqueue_head(&server->probe_wq); + spin_lock_init(&server->probe_lock); afs_inc_servers_outstanding(net); _leave(" = %p", server); @@ -246,41 +249,23 @@ enomem: static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, struct key *key, const uuid_t *uuid) { - struct afs_addr_cursor ac; - struct afs_addr_list *alist; + struct afs_vl_cursor vc; + struct afs_addr_list *alist = NULL; int ret; - ret = afs_set_vl_cursor(&ac, cell); - if (ret < 0) - return ERR_PTR(ret); - - while (afs_iterate_addresses(&ac)) { - if (test_bit(ac.index, &ac.alist->yfs)) - alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid); - else - alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid); - switch (ac.error) { - case 0: - afs_end_cursor(&ac); - return alist; - case -ECONNABORTED: - ac.error = afs_abort_to_error(ac.abort_code); - goto error; - case -ENOMEM: - case -ENONET: - goto error; - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - break; - default: - ac.error = -EIO; - goto error; + ret = -ERESTARTSYS; + if (afs_begin_vlserver_operation(&vc, cell, key)) { + while (afs_select_vlserver(&vc)) { + if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) + alist = afs_yfsvl_get_endpoints(&vc, uuid); + else + alist = afs_vl_get_addrs_u(&vc, uuid); } + + ret = afs_end_vlserver_operation(&vc); } -error: - return ERR_PTR(afs_end_cursor(&ac)); + return ret < 0 ? ERR_PTR(ret) : alist; } /* @@ -382,9 +367,7 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) struct afs_addr_list *alist = rcu_access_pointer(server->addresses); struct afs_addr_cursor ac = { .alist = alist, - .start = alist->index, - .index = 0, - .addr = &alist->addrs[alist->index], + .index = alist->preferred, .error = 0, }; _enter("%p", server); @@ -392,6 +375,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) afs_fs_give_up_all_callbacks(net, server, &ac, NULL); + wait_var_event(&server->probe_outstanding, + atomic_read(&server->probe_outstanding) == 0); + call_rcu(&server->rcu, afs_server_rcu); afs_dec_servers_outstanding(net); } @@ -525,99 +511,6 @@ void afs_purge_servers(struct afs_net *net) } /* - * Probe a fileserver to find its capabilities. - * - * TODO: Try service upgrade. - */ -static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc) -{ - _enter(""); - - fc->ac.addr = NULL; - fc->ac.start = READ_ONCE(fc->ac.alist->index); - fc->ac.index = fc->ac.start; - fc->ac.error = 0; - fc->ac.begun = false; - - while (afs_iterate_addresses(&fc->ac)) { - afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server, - &fc->ac, fc->key); - switch (fc->ac.error) { - case 0: - afs_end_cursor(&fc->ac); - set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags); - return true; - case -ECONNABORTED: - fc->ac.error = afs_abort_to_error(fc->ac.abort_code); - goto error; - case -ENOMEM: - case -ENONET: - goto error; - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - case -ETIMEDOUT: - case -ETIME: - break; - default: - fc->ac.error = -EIO; - goto error; - } - } - -error: - afs_end_cursor(&fc->ac); - return false; -} - -/* - * If we haven't already, try probing the fileserver to get its capabilities. - * We try not to instigate parallel probes, but it's possible that the parallel - * probes will fail due to authentication failure when ours would succeed. - * - * TODO: Try sending an anonymous probe if an authenticated probe fails. - */ -bool afs_probe_fileserver(struct afs_fs_cursor *fc) -{ - bool success; - int ret, retries = 0; - - _enter(""); - -retry: - if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) { - _leave(" = t"); - return true; - } - - if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) { - success = afs_do_probe_fileserver(fc); - clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags); - wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING); - _leave(" = t"); - return success; - } - - _debug("wait"); - ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING, - TASK_INTERRUPTIBLE); - if (ret == -ERESTARTSYS) { - fc->ac.error = ret; - _leave(" = f [%d]", ret); - return false; - } - - retries++; - if (retries == 4) { - fc->ac.error = -ESTALE; - _leave(" = f [stale]"); - return false; - } - _debug("retry"); - goto retry; -} - -/* * Get an update for a server's address list. */ static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server) diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c @@ -118,11 +118,11 @@ bool afs_annotate_server_list(struct afs_server_list *new, return false; changed: - /* Maintain the same current server as before if possible. */ - cur = old->servers[old->index].server; + /* Maintain the same preferred server as before if possible. */ + cur = old->servers[old->preferred].server; for (j = 0; j < new->nr_servers; j++) { if (new->servers[j].server == cur) { - new->index = j; + new->preferred = j; break; } } diff --git a/fs/afs/super.c b/fs/afs/super.c @@ -406,10 +406,11 @@ static int afs_fill_super(struct super_block *sb, inode = afs_iget_pseudo_dir(sb, true); sb->s_flags |= SB_RDONLY; } else { - sprintf(sb->s_id, "%u", as->volume->vid); + sprintf(sb->s_id, "%llu", as->volume->vid); afs_activate_volume(as->volume); fid.vid = as->volume->vid; fid.vnode = 1; + fid.vnode_hi = 0; fid.unique = 1; inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL); } @@ -663,7 +664,7 @@ static void afs_destroy_inode(struct inode *inode) { struct afs_vnode *vnode = AFS_FS_I(inode); - _enter("%p{%x:%u}", inode, vnode->fid.vid, vnode->fid.vnode); + _enter("%p{%llx:%llu}", inode, vnode->fid.vid, vnode->fid.vnode); _debug("DESTROY INODE %p", inode); diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c @@ -0,0 +1,340 @@ +/* AFS vlserver list management. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include "internal.h" + +struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len, + unsigned short port) +{ + struct afs_vlserver *vlserver; + + vlserver = kzalloc(struct_size(vlserver, name, name_len + 1), + GFP_KERNEL); + if (vlserver) { + atomic_set(&vlserver->usage, 1); + rwlock_init(&vlserver->lock); + init_waitqueue_head(&vlserver->probe_wq); + spin_lock_init(&vlserver->probe_lock); + vlserver->name_len = name_len; + vlserver->port = port; + memcpy(vlserver->name, name, name_len); + } + return vlserver; +} + +static void afs_vlserver_rcu(struct rcu_head *rcu) +{ + struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu); + + afs_put_addrlist(rcu_access_pointer(vlserver->addresses)); + kfree_rcu(vlserver, rcu); +} + +void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver) +{ + if (vlserver) { + unsigned int u = atomic_dec_return(&vlserver->usage); + //_debug("VL PUT %p{%u}", vlserver, u); + + if (u == 0) + call_rcu(&vlserver->rcu, afs_vlserver_rcu); + } +} + +struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers) +{ + struct afs_vlserver_list *vllist; + + vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL); + if (vllist) { + atomic_set(&vllist->usage, 1); + rwlock_init(&vllist->lock); + } + + return vllist; +} + +void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist) +{ + if (vllist) { + unsigned int u = atomic_dec_return(&vllist->usage); + + //_debug("VLLS PUT %p{%u}", vllist, u); + if (u == 0) { + int i; + + for (i = 0; i < vllist->nr_servers; i++) { + afs_put_vlserver(net, vllist->servers[i].server); + } + kfree_rcu(vllist, rcu); + } + } +} + +static u16 afs_extract_le16(const u8 **_b) +{ + u16 val; + + val = (u16)*(*_b)++ << 0; + val |= (u16)*(*_b)++ << 8; + return val; +} + +/* + * Build a VL server address list from a DNS queried server list. + */ +static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end, + u8 nr_addrs, u16 port) +{ + struct afs_addr_list *alist; + const u8 *b = *_b; + int ret = -EINVAL; + + alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port); + if (!alist) + return ERR_PTR(-ENOMEM); + if (nr_addrs == 0) + return alist; + + for (; nr_addrs > 0 && end - b >= nr_addrs; nr_addrs--) { + struct dns_server_list_v1_address hdr; + __be32 x[4]; + + hdr.address_type = *b++; + + switch (hdr.address_type) { + case DNS_ADDRESS_IS_IPV4: + if (end - b < 4) { + _leave(" = -EINVAL [short inet]"); + goto error; + } + memcpy(x, b, 4); + afs_merge_fs_addr4(alist, x[0], port); + b += 4; + break; + + case DNS_ADDRESS_IS_IPV6: + if (end - b < 16) { + _leave(" = -EINVAL [short inet6]"); + goto error; + } + memcpy(x, b, 16); + afs_merge_fs_addr6(alist, x, port); + b += 16; + break; + + default: + _leave(" = -EADDRNOTAVAIL [unknown af %u]", + hdr.address_type); + ret = -EADDRNOTAVAIL; + goto error; + } + } + + /* Start with IPv6 if available. */ + if (alist->nr_ipv4 < alist->nr_addrs) + alist->preferred = alist->nr_ipv4; + + *_b = b; + return alist; + +error: + *_b = b; + afs_put_addrlist(alist); + return ERR_PTR(ret); +} + +/* + * Build a VL server list from a DNS queried server list. + */ +struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell, + const void *buffer, + size_t buffer_size) +{ + const struct dns_server_list_v1_header *hdr = buffer; + struct dns_server_list_v1_server bs; + struct afs_vlserver_list *vllist, *previous; + struct afs_addr_list *addrs; + struct afs_vlserver *server; + const u8 *b = buffer, *end = buffer + buffer_size; + int ret = -ENOMEM, nr_servers, i, j; + + _enter(""); + + /* Check that it's a server list, v1 */ + if (end - b < sizeof(*hdr) || + hdr->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST || + hdr->hdr.version != 1) { + pr_notice("kAFS: Got DNS record [%u,%u] len %zu\n", + hdr->hdr.content, hdr->hdr.version, end - b); + ret = -EDESTADDRREQ; + goto dump; + } + + nr_servers = hdr->nr_servers; + + vllist = afs_alloc_vlserver_list(nr_servers); + if (!vllist) + return ERR_PTR(-ENOMEM); + + vllist->source = (hdr->source < NR__dns_record_source) ? + hdr->source : NR__dns_record_source; + vllist->status = (hdr->status < NR__dns_lookup_status) ? + hdr->status : NR__dns_lookup_status; + + read_lock(&cell->vl_servers_lock); + previous = afs_get_vlserverlist( + rcu_dereference_protected(cell->vl_servers, + lockdep_is_held(&cell->vl_servers_lock))); + read_unlock(&cell->vl_servers_lock); + + b += sizeof(*hdr); + while (end - b >= sizeof(bs)) { + bs.name_len = afs_extract_le16(&b); + bs.priority = afs_extract_le16(&b); + bs.weight = afs_extract_le16(&b); + bs.port = afs_extract_le16(&b); + bs.source = *b++; + bs.status = *b++; + bs.protocol = *b++; + bs.nr_addrs = *b++; + + _debug("extract %u %u %u %u %u %u %*.*s", + bs.name_len, bs.priority, bs.weight, + bs.port, bs.protocol, bs.nr_addrs, + bs.name_len, bs.name_len, b); + + if (end - b < bs.name_len) + break; + + ret = -EPROTONOSUPPORT; + if (bs.protocol == DNS_SERVER_PROTOCOL_UNSPECIFIED) { + bs.protocol = DNS_SERVER_PROTOCOL_UDP; + } else if (bs.protocol != DNS_SERVER_PROTOCOL_UDP) { + _leave(" = [proto %u]", bs.protocol); + goto error; + } + + if (bs.port == 0) + bs.port = AFS_VL_PORT; + if (bs.source > NR__dns_record_source) + bs.source = NR__dns_record_source; + if (bs.status > NR__dns_lookup_status) + bs.status = NR__dns_lookup_status; + + server = NULL; + if (previous) { + /* See if we can update an old server record */ + for (i = 0; i < previous->nr_servers; i++) { + struct afs_vlserver *p = previous->servers[i].server; + + if (p->name_len == bs.name_len && + p->port == bs.port && + strncasecmp(b, p->name, bs.name_len) == 0) { + server = afs_get_vlserver(p); + break; + } + } + } + + if (!server) { + ret = -ENOMEM; + server = afs_alloc_vlserver(b, bs.name_len, bs.port); + if (!server) + goto error; + } + + b += bs.name_len; + + /* Extract the addresses - note that we can't skip this as we + * have to advance the payload pointer. + */ + addrs = afs_extract_vl_addrs(&b, end, bs.nr_addrs, bs.port); + if (IS_ERR(addrs)) { + ret = PTR_ERR(addrs); + goto error_2; + } + + if (vllist->nr_servers >= nr_servers) { + _debug("skip %u >= %u", vllist->nr_servers, nr_servers); + afs_put_addrlist(addrs); + afs_put_vlserver(cell->net, server); + continue; + } + + addrs->source = bs.source; + addrs->status = bs.status; + + if (addrs->nr_addrs == 0) { + afs_put_addrlist(addrs); + if (!rcu_access_pointer(server->addresses)) { + afs_put_vlserver(cell->net, server); + continue; + } + } else { + struct afs_addr_list *old = addrs; + + write_lock(&server->lock); + rcu_swap_protected(server->addresses, old, + lockdep_is_held(&server->lock)); + write_unlock(&server->lock); + afs_put_addrlist(old); + } + + + /* TODO: Might want to check for duplicates */ + + /* Insertion-sort by priority and weight */ + for (j = 0; j < vllist->nr_servers; j++) { + if (bs.priority < vllist->servers[j].priority) + break; /* Lower preferable */ + if (bs.priority == vllist->servers[j].priority && + bs.weight > vllist->servers[j].weight) + break; /* Higher preferable */ + } + + if (j < vllist->nr_servers) { + memmove(vllist->servers + j + 1, + vllist->servers + j, + (vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry)); + } + + clear_bit(AFS_VLSERVER_FL_PROBED, &server->flags); + + vllist->servers[j].priority = bs.priority; + vllist->servers[j].weight = bs.weight; + vllist->servers[j].server = server; + vllist->nr_servers++; + } + + if (b != end) { + _debug("parse error %zd", b - end); + goto error; + } + + afs_put_vlserverlist(cell->net, previous); + _leave(" = ok [%u]", vllist->nr_servers); + return vllist; + +error_2: + afs_put_vlserver(cell->net, server); +error: + afs_put_vlserverlist(cell->net, vllist); + afs_put_vlserverlist(cell->net, previous); +dump: + if (ret != -ENOMEM) { + printk(KERN_DEBUG "DNS: at %zu\n", (const void *)b - buffer); + print_hex_dump_bytes("DNS: ", DUMP_PREFIX_NONE, buffer, buffer_size); + } + return ERR_PTR(ret); +} diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c @@ -0,0 +1,273 @@ +/* AFS vlserver probing + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/sched.h> +#include <linux/slab.h> +#include "afs_fs.h" +#include "internal.h" +#include "protocol_yfs.h" + +static bool afs_vl_probe_done(struct afs_vlserver *server) +{ + if (!atomic_dec_and_test(&server->probe_outstanding)) + return false; + + wake_up_var(&server->probe_outstanding); + clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags); + wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING); + return true; +} + +/* + * Process the result of probing a vlserver. This is called after successful + * or failed delivery of an VL.GetCapabilities operation. + */ +void afs_vlserver_probe_result(struct afs_call *call) +{ + struct afs_addr_list *alist = call->alist; + struct afs_vlserver *server = call->reply[0]; + unsigned int server_index = (long)call->reply[1]; + unsigned int index = call->addr_ix; + unsigned int rtt = UINT_MAX; + bool have_result = false; + u64 _rtt; + int ret = call->error; + + _enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code); + + spin_lock(&server->probe_lock); + + switch (ret) { + case 0: + server->probe.error = 0; + goto responded; + case -ECONNABORTED: + if (!server->probe.responded) { + server->probe.abort_code = call->abort_code; + server->probe.error = ret; + } + goto responded; + case -ENOMEM: + case -ENONET: + server->probe.local_failure = true; + afs_io_error(call, afs_io_error_vl_probe_fail); + goto out; + case -ECONNRESET: /* Responded, but call expired. */ + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + default: + clear_bit(index, &alist->responded); + set_bit(index, &alist->failed); + if (!server->probe.responded && + (server->probe.error == 0 || + server->probe.error == -ETIMEDOUT || + server->probe.error == -ETIME)) + server->probe.error = ret; + afs_io_error(call, afs_io_error_vl_probe_fail); + goto out; + } + +responded: + set_bit(index, &alist->responded); + clear_bit(index, &alist->failed); + + if (call->service_id == YFS_VL_SERVICE) { + server->probe.is_yfs = true; + set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags); + alist->addrs[index].srx_service = call->service_id; + } else { + server->probe.not_yfs = true; + if (!server->probe.is_yfs) { + clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags); + alist->addrs[index].srx_service = call->service_id; + } + } + + /* Get the RTT and scale it to fit into a 32-bit value that represents + * over a minute of time so that we can access it with one instruction + * on a 32-bit system. + */ + _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall); + _rtt /= 64; + rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt; + if (rtt < server->probe.rtt) { + server->probe.rtt = rtt; + alist->preferred = index; + have_result = true; + } + + smp_wmb(); /* Set rtt before responded. */ + server->probe.responded = true; + set_bit(AFS_VLSERVER_FL_PROBED, &server->flags); +out: + spin_unlock(&server->probe_lock); + + _debug("probe [%u][%u] %pISpc rtt=%u ret=%d", + server_index, index, &alist->addrs[index].transport, + (unsigned int)rtt, ret); + + have_result |= afs_vl_probe_done(server); + if (have_result) { + server->probe.have_result = true; + wake_up_var(&server->probe.have_result); + wake_up_all(&server->probe_wq); + } +} + +/* + * Probe all of a vlserver's addresses to find out the best route and to + * query its capabilities. + */ +static int afs_do_probe_vlserver(struct afs_net *net, + struct afs_vlserver *server, + struct key *key, + unsigned int server_index) +{ + struct afs_addr_cursor ac = { + .index = 0, + }; + int ret; + + _enter("%s", server->name); + + read_lock(&server->lock); + ac.alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->lock)); + read_unlock(&server->lock); + + atomic_set(&server->probe_outstanding, ac.alist->nr_addrs); + memset(&server->probe, 0, sizeof(server->probe)); + server->probe.rtt = UINT_MAX; + + for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) { + ret = afs_vl_get_capabilities(net, &ac, key, server, + server_index, true); + if (ret != -EINPROGRESS) { + afs_vl_probe_done(server); + return ret; + } + } + + return 0; +} + +/* + * Send off probes to all unprobed servers. + */ +int afs_send_vl_probes(struct afs_net *net, struct key *key, + struct afs_vlserver_list *vllist) +{ + struct afs_vlserver *server; + int i, ret; + + for (i = 0; i < vllist->nr_servers; i++) { + server = vllist->servers[i].server; + if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags)) + continue; + + if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags)) { + ret = afs_do_probe_vlserver(net, server, key, i); + if (ret) + return ret; + } + } + + return 0; +} + +/* + * Wait for the first as-yet untried server to respond. + */ +int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist, + unsigned long untried) +{ + struct wait_queue_entry *waits; + struct afs_vlserver *server; + unsigned int rtt = UINT_MAX; + bool have_responders = false; + int pref = -1, i; + + _enter("%u,%lx", vllist->nr_servers, untried); + + /* Only wait for servers that have a probe outstanding. */ + for (i = 0; i < vllist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = vllist->servers[i].server; + if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags)) + __clear_bit(i, &untried); + if (server->probe.responded) + have_responders = true; + } + } + if (have_responders || !untried) + return 0; + + waits = kmalloc(array_size(vllist->nr_servers, sizeof(*waits)), GFP_KERNEL); + if (!waits) + return -ENOMEM; + + for (i = 0; i < vllist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = vllist->servers[i].server; + init_waitqueue_entry(&waits[i], current); + add_wait_queue(&server->probe_wq, &waits[i]); + } + } + + for (;;) { + bool still_probing = false; + + set_current_state(TASK_INTERRUPTIBLE); + for (i = 0; i < vllist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = vllist->servers[i].server; + if (server->probe.responded) + goto stop; + if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags)) + still_probing = true; + } + } + + if (!still_probing || unlikely(signal_pending(current))) + goto stop; + schedule(); + } + +stop: + set_current_state(TASK_RUNNING); + + for (i = 0; i < vllist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = vllist->servers[i].server; + if (server->probe.responded && + server->probe.rtt < rtt) { + pref = i; + rtt = server->probe.rtt; + } + + remove_wait_queue(&server->probe_wq, &waits[i]); + } + } + + kfree(waits); + + if (pref == -1 && signal_pending(current)) + return -ERESTARTSYS; + + if (pref >= 0) + vllist->preferred = pref; + + _leave(" = 0 [%u]", pref); + return 0; +} diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c @@ -0,0 +1,355 @@ +/* Handle vlserver selection and rotation. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/sched/signal.h> +#include "internal.h" +#include "afs_vl.h" + +/* + * Begin an operation on a volume location server. + */ +bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, + struct key *key) +{ + memset(vc, 0, sizeof(*vc)); + vc->cell = cell; + vc->key = key; + vc->error = -EDESTADDRREQ; + vc->ac.error = SHRT_MAX; + + if (signal_pending(current)) { + vc->error = -EINTR; + vc->flags |= AFS_VL_CURSOR_STOP; + return false; + } + + return true; +} + +/* + * Begin iteration through a server list, starting with the last used server if + * possible, or the last recorded good server if not. + */ +static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) +{ + struct afs_cell *cell = vc->cell; + + if (wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET, + TASK_INTERRUPTIBLE)) { + vc->error = -ERESTARTSYS; + return false; + } + + read_lock(&cell->vl_servers_lock); + vc->server_list = afs_get_vlserverlist( + rcu_dereference_protected(cell->vl_servers, + lockdep_is_held(&cell->vl_servers_lock))); + read_unlock(&cell->vl_servers_lock); + if (!vc->server_list || !vc->server_list->nr_servers) + return false; + + vc->untried = (1UL << vc->server_list->nr_servers) - 1; + vc->index = -1; + return true; +} + +/* + * Select the vlserver to use. May be called multiple times to rotate + * through the vlservers. + */ +bool afs_select_vlserver(struct afs_vl_cursor *vc) +{ + struct afs_addr_list *alist; + struct afs_vlserver *vlserver; + u32 rtt; + int error = vc->ac.error, abort_code, i; + + _enter("%lx[%d],%lx[%d],%d,%d", + vc->untried, vc->index, + vc->ac.tried, vc->ac.index, + error, vc->ac.abort_code); + + if (vc->flags & AFS_VL_CURSOR_STOP) { + _leave(" = f [stopped]"); + return false; + } + + vc->nr_iterations++; + + /* Evaluate the result of the previous operation, if there was one. */ + switch (error) { + case SHRT_MAX: + goto start; + + default: + case 0: + /* Success or local failure. Stop. */ + vc->error = error; + vc->flags |= AFS_VL_CURSOR_STOP; + _leave(" = f [okay/local %d]", vc->ac.error); + return false; + + case -ECONNABORTED: + /* The far side rejected the operation on some grounds. This + * might involve the server being busy or the volume having been moved. + */ + switch (vc->ac.abort_code) { + case AFSVL_IO: + case AFSVL_BADVOLOPER: + case AFSVL_NOMEM: + /* The server went weird. */ + vc->error = -EREMOTEIO; + //write_lock(&vc->cell->vl_servers_lock); + //vc->server_list->weird_mask |= 1 << vc->index; + //write_unlock(&vc->cell->vl_servers_lock); + goto next_server; + + default: + vc->error = afs_abort_to_error(vc->ac.abort_code); + goto failed; + } + + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + _debug("no conn %d", error); + vc->error = error; + goto iterate_address; + + case -ECONNRESET: + _debug("call reset"); + vc->error = error; + vc->flags |= AFS_VL_CURSOR_RETRY; + goto next_server; + } + +restart_from_beginning: + _debug("restart"); + afs_end_cursor(&vc->ac); + afs_put_vlserverlist(vc->cell->net, vc->server_list); + vc->server_list = NULL; + if (vc->flags & AFS_VL_CURSOR_RETRIED) + goto failed; + vc->flags |= AFS_VL_CURSOR_RETRIED; +start: + _debug("start"); + + if (!afs_start_vl_iteration(vc)) + goto failed; + + error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); + if (error < 0) + goto failed_set_error; + +pick_server: + _debug("pick [%lx]", vc->untried); + + error = afs_wait_for_vl_probes(vc->server_list, vc->untried); + if (error < 0) + goto failed_set_error; + + /* Pick the untried server with the lowest RTT. */ + vc->index = vc->server_list->preferred; + if (test_bit(vc->index, &vc->untried)) + goto selected_server; + + vc->index = -1; + rtt = U32_MAX; + for (i = 0; i < vc->server_list->nr_servers; i++) { + struct afs_vlserver *s = vc->server_list->servers[i].server; + + if (!test_bit(i, &vc->untried) || !s->probe.responded) + continue; + if (s->probe.rtt < rtt) { + vc->index = i; + rtt = s->probe.rtt; + } + } + + if (vc->index == -1) + goto no_more_servers; + +selected_server: + _debug("use %d", vc->index); + __clear_bit(vc->index, &vc->untried); + + /* We're starting on a different vlserver from the list. We need to + * check it, find its address list and probe its capabilities before we + * use it. + */ + ASSERTCMP(vc->ac.alist, ==, NULL); + vlserver = vc->server_list->servers[vc->index].server; + vc->server = vlserver; + + _debug("USING VLSERVER: %s", vlserver->name); + + read_lock(&vlserver->lock); + alist = rcu_dereference_protected(vlserver->addresses, + lockdep_is_held(&vlserver->lock)); + afs_get_addrlist(alist); + read_unlock(&vlserver->lock); + + memset(&vc->ac, 0, sizeof(vc->ac)); + + if (!vc->ac.alist) + vc->ac.alist = alist; + else + afs_put_addrlist(alist); + + vc->ac.index = -1; + +iterate_address: + ASSERT(vc->ac.alist); + /* Iterate over the current server's address list to try and find an + * address on which it will respond to us. + */ + if (!afs_iterate_addresses(&vc->ac)) + goto next_server; + + _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); + + _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport); + return true; + +next_server: + _debug("next"); + afs_end_cursor(&vc->ac); + goto pick_server; + +no_more_servers: + /* That's all the servers poked to no good effect. Try again if some + * of them were busy. + */ + if (vc->flags & AFS_VL_CURSOR_RETRY) + goto restart_from_beginning; + + abort_code = 0; + error = -EDESTADDRREQ; + for (i = 0; i < vc->server_list->nr_servers; i++) { + struct afs_vlserver *s = vc->server_list->servers[i].server; + int probe_error = READ_ONCE(s->probe.error); + + switch (probe_error) { + case 0: + continue; + default: + if (error == -ETIMEDOUT || + error == -ETIME) + continue; + case -ETIMEDOUT: + case -ETIME: + if (error == -ENOMEM || + error == -ENONET) + continue; + case -ENOMEM: + case -ENONET: + if (error == -ENETUNREACH) + continue; + case -ENETUNREACH: + if (error == -EHOSTUNREACH) + continue; + case -EHOSTUNREACH: + if (error == -ECONNREFUSED) + continue; + case -ECONNREFUSED: + if (error == -ECONNRESET) + continue; + case -ECONNRESET: /* Responded, but call expired. */ + if (error == -ECONNABORTED) + continue; + case -ECONNABORTED: + abort_code = s->probe.abort_code; + error = probe_error; + continue; + } + } + + if (error == -ECONNABORTED) + error = afs_abort_to_error(abort_code); + +failed_set_error: + vc->error = error; +failed: + vc->flags |= AFS_VL_CURSOR_STOP; + afs_end_cursor(&vc->ac); + _leave(" = f [failed %d]", vc->error); + return false; +} + +/* + * Dump cursor state in the case of the error being EDESTADDRREQ. + */ +static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) +{ + static int count; + int i; + + if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) + return; + count++; + + rcu_read_lock(); + pr_notice("EDESTADDR occurred\n"); + pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", + vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error); + + if (vc->server_list) { + const struct afs_vlserver_list *sl = vc->server_list; + pr_notice("VC: SL nr=%u ix=%u\n", + sl->nr_servers, sl->index); + for (i = 0; i < sl->nr_servers; i++) { + const struct afs_vlserver *s = sl->servers[i].server; + pr_notice("VC: server %s+%hu fl=%lx E=%hd\n", + s->name, s->port, s->flags, s->probe.error); + if (s->addresses) { + const struct afs_addr_list *a = + rcu_dereference(s->addresses); + pr_notice("VC: - nr=%u/%u/%u pf=%u\n", + a->nr_ipv4, a->nr_addrs, a->max_addrs, + a->preferred); + pr_notice("VC: - pr=%lx R=%lx F=%lx\n", + a->probed, a->responded, a->failed); + if (a == vc->ac.alist) + pr_notice("VC: - current\n"); + } + } + } + + pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", + vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error, + vc->ac.responded, vc->ac.nr_iterations); + rcu_read_unlock(); +} + +/* + * Tidy up a volume location server cursor and unlock the vnode. + */ +int afs_end_vlserver_operation(struct afs_vl_cursor *vc) +{ + struct afs_net *net = vc->cell->net; + + if (vc->error == -EDESTADDRREQ || + vc->error == -ENETUNREACH || + vc->error == -EHOSTUNREACH) + afs_vl_dump_edestaddrreq(vc); + + afs_end_cursor(&vc->ac); + afs_put_vlserverlist(net, vc->server_list); + + if (vc->error == -ECONNABORTED) + vc->error = afs_abort_to_error(vc->ac.abort_code); + + return vc->error; +} diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c @@ -128,14 +128,13 @@ static const struct afs_call_type afs_RXVLGetEntryByNameU = { * Dispatch a get volume entry by name or ID operation (uuid variant). If the * volname is a decimal number then it's a volume ID not a volume name. */ -struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, - struct afs_addr_cursor *ac, - struct key *key, +struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc, const char *volname, int volnamesz) { struct afs_vldb_entry *entry; struct afs_call *call; + struct afs_net *net = vc->cell->net; size_t reqsz, padsz; __be32 *bp; @@ -155,7 +154,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, return ERR_PTR(-ENOMEM); } - call->key = key; + call->key = vc->key; call->reply[0] = entry; call->ret_reply0 = true; @@ -168,7 +167,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, memset((void *)bp + volnamesz, 0, padsz); trace_afs_make_vl_call(call); - return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false); + return (struct afs_vldb_entry *)afs_make_call(&vc->ac, call, GFP_KERNEL, false); } /* @@ -187,19 +186,18 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call) u32 uniquifier, nentries, count; int i, ret; - _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + _enter("{%u,%zu/%u}", + call->unmarshall, iov_iter_count(call->_iter), call->count); -again: switch (call->unmarshall) { case 0: - call->offset = 0; + afs_extract_to_buf(call, + sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32)); call->unmarshall++; /* Extract the returned uuid, uniquifier, nentries and blkaddrs size */ case 1: - ret = afs_extract_data(call, call->buffer, - sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32), - true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -216,28 +214,28 @@ again: call->reply[0] = alist; call->count = count; call->count2 = nentries; - call->offset = 0; call->unmarshall++; + more_entries: + count = min(call->count, 4U); + afs_extract_to_buf(call, count * sizeof(__be32)); + /* Extract entries */ case 2: - count = min(call->count, 4U); - ret = afs_extract_data(call, call->buffer, - count * sizeof(__be32), - call->count > 4); + ret = afs_extract_data(call, call->count > 4); if (ret < 0) return ret; alist = call->reply[0]; bp = call->buffer; + count = min(call->count, 4U); for (i = 0; i < count; i++) if (alist->nr_addrs < call->count2) afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT); call->count -= count; if (call->count > 0) - goto again; - call->offset = 0; + goto more_entries; call->unmarshall++; break; } @@ -267,14 +265,13 @@ static const struct afs_call_type afs_RXVLGetAddrsU = { * Dispatch an operation to get the addresses for a server, where the server is * nominated by UUID. */ -struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, - struct afs_addr_cursor *ac, - struct key *key, +struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc, const uuid_t *uuid) { struct afs_ListAddrByAttributes__xdr *r; const struct afs_uuid *u = (const struct afs_uuid *)uuid; struct afs_call *call; + struct afs_net *net = vc->cell->net; __be32 *bp; int i; @@ -286,7 +283,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, if (!call) return ERR_PTR(-ENOMEM); - call->key = key; + call->key = vc->key; call->reply[0] = NULL; call->ret_reply0 = true; @@ -307,7 +304,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, r->uuid.node[i] = htonl(u->node[i]); trace_afs_make_vl_call(call); - return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); + return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false); } /* @@ -318,54 +315,51 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call) u32 count; int ret; - _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + _enter("{%u,%zu/%u}", + call->unmarshall, iov_iter_count(call->_iter), call->count); -again: switch (call->unmarshall) { case 0: - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; /* Extract the capabilities word count */ case 1: - ret = afs_extract_data(call, &call->tmp, - 1 * sizeof(__be32), - true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; count = ntohl(call->tmp); - call->count = count; call->count2 = count; - call->offset = 0; + call->unmarshall++; + afs_extract_discard(call, count * sizeof(__be32)); /* Extract capabilities words */ case 2: - count = min(call->count, 16U); - ret = afs_extract_data(call, call->buffer, - count * sizeof(__be32), - call->count > 16); + ret = afs_extract_data(call, false); if (ret < 0) return ret; /* TODO: Examine capabilities */ - call->count -= count; - if (call->count > 0) - goto again; - call->offset = 0; call->unmarshall++; break; } - call->reply[0] = (void *)(unsigned long)call->service_id; - _leave(" = 0 [done]"); return 0; } +static void afs_destroy_vl_get_capabilities(struct afs_call *call) +{ + struct afs_vlserver *server = call->reply[0]; + + afs_put_vlserver(call->net, server); + afs_flat_call_destructor(call); +} + /* * VL.GetCapabilities operation type */ @@ -373,11 +367,12 @@ static const struct afs_call_type afs_RXVLGetCapabilities = { .name = "VL.GetCapabilities", .op = afs_VL_GetCapabilities, .deliver = afs_deliver_vl_get_capabilities, - .destructor = afs_flat_call_destructor, + .done = afs_vlserver_probe_result, + .destructor = afs_destroy_vl_get_capabilities, }; /* - * Probe a fileserver for the capabilities that it supports. This can + * Probe a volume server for the capabilities that it supports. This can * return up to 196 words. * * We use this to probe for service upgrade to determine what the server at the @@ -385,7 +380,10 @@ static const struct afs_call_type afs_RXVLGetCapabilities = { */ int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac, - struct key *key) + struct key *key, + struct afs_vlserver *server, + unsigned int server_index, + bool async) { struct afs_call *call; __be32 *bp; @@ -397,9 +395,10 @@ int afs_vl_get_capabilities(struct afs_net *net, return -ENOMEM; call->key = key; - call->upgrade = true; /* Let's see if this is a YFS server */ - call->reply[0] = (void *)VLGETCAPABILITIES; - call->ret_reply0 = true; + call->reply[0] = afs_get_vlserver(server); + call->reply[1] = (void *)(long)server_index; + call->upgrade = true; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -407,7 +406,7 @@ int afs_vl_get_capabilities(struct afs_net *net, /* Can't take a ref on server */ trace_afs_make_vl_call(call); - return afs_make_call(ac, call, GFP_KERNEL, false); + return afs_make_call(ac, call, GFP_KERNEL, async); } /* @@ -426,22 +425,19 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) u32 uniquifier, size; int ret; - _enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, call->count2); + _enter("{%u,%zu,%u}", + call->unmarshall, iov_iter_count(call->_iter), call->count2); -again: switch (call->unmarshall) { case 0: - call->offset = 0; + afs_extract_to_buf(call, sizeof(uuid_t) + 3 * sizeof(__be32)); call->unmarshall = 1; /* Extract the returned uuid, uniquifier, fsEndpoints count and * either the first fsEndpoint type or the volEndpoints * count if there are no fsEndpoints. */ case 1: - ret = afs_extract_data(call, call->buffer, - sizeof(uuid_t) + - 3 * sizeof(__be32), - true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -451,22 +447,19 @@ again: call->count2 = ntohl(*bp); /* Type or next count */ if (call->count > YFS_MAXENDPOINTS) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_fsendpt_num); alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT); if (!alist) return -ENOMEM; alist->version = uniquifier; call->reply[0] = alist; - call->offset = 0; if (call->count == 0) goto extract_volendpoints; - call->unmarshall = 2; - - /* Extract fsEndpoints[] entries */ - case 2: + next_fsendpoint: switch (call->count2) { case YFS_ENDPOINT_IPV4: size = sizeof(__be32) * (1 + 1 + 1); @@ -475,11 +468,17 @@ again: size = sizeof(__be32) * (1 + 4 + 1); break; default: - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_fsendpt_type); } size += sizeof(__be32); - ret = afs_extract_data(call, call->buffer, size, true); + afs_extract_to_buf(call, size); + call->unmarshall = 2; + + /* Extract fsEndpoints[] entries */ + case 2: + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -488,18 +487,21 @@ again: switch (call->count2) { case YFS_ENDPOINT_IPV4: if (ntohl(bp[0]) != sizeof(__be32) * 2) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_fsendpt4_len); afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2])); bp += 3; break; case YFS_ENDPOINT_IPV6: if (ntohl(bp[0]) != sizeof(__be32) * 5) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_fsendpt6_len); afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5])); bp += 6; break; default: - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_fsendpt_type); } /* Got either the type of the next entry or the count of @@ -507,10 +509,9 @@ again: */ call->count2 = ntohl(*bp++); - call->offset = 0; call->count--; if (call->count > 0) - goto again; + goto next_fsendpoint; extract_volendpoints: /* Extract the list of volEndpoints. */ @@ -518,8 +519,10 @@ again: if (!call->count) goto end; if (call->count > YFS_MAXENDPOINTS) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_vlendpt_type); + afs_extract_to_buf(call, 1 * sizeof(__be32)); call->unmarshall = 3; /* Extract the type of volEndpoints[0]. Normally we would @@ -527,17 +530,14 @@ again: * data of the current one, but this is the first... */ case 3: - ret = afs_extract_data(call, call->buffer, sizeof(__be32), true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; bp = call->buffer; - call->count2 = ntohl(*bp++); - call->offset = 0; - call->unmarshall = 4; - /* Extract volEndpoints[] entries */ - case 4: + next_volendpoint: + call->count2 = ntohl(*bp++); switch (call->count2) { case YFS_ENDPOINT_IPV4: size = sizeof(__be32) * (1 + 1 + 1); @@ -546,12 +546,18 @@ again: size = sizeof(__be32) * (1 + 4 + 1); break; default: - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_vlendpt_type); } if (call->count > 1) - size += sizeof(__be32); - ret = afs_extract_data(call, call->buffer, size, true); + size += sizeof(__be32); /* Get next type too */ + afs_extract_to_buf(call, size); + call->unmarshall = 4; + + /* Extract volEndpoints[] entries */ + case 4: + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -559,34 +565,35 @@ again: switch (call->count2) { case YFS_ENDPOINT_IPV4: if (ntohl(bp[0]) != sizeof(__be32) * 2) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_vlendpt4_len); bp += 3; break; case YFS_ENDPOINT_IPV6: if (ntohl(bp[0]) != sizeof(__be32) * 5) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_vlendpt6_len); bp += 6; break; default: - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_vlendpt_type); } /* Got either the type of the next entry or the count of * volEndpoints if no more fsEndpoints. */ - call->offset = 0; call->count--; - if (call->count > 0) { - call->count2 = ntohl(*bp++); - goto again; - } + if (call->count > 0) + goto next_volendpoint; end: + afs_extract_discard(call, 0); call->unmarshall = 5; /* Done */ case 5: - ret = afs_extract_data(call, call->buffer, 0, false); + ret = afs_extract_data(call, false); if (ret < 0) return ret; call->unmarshall = 6; @@ -596,11 +603,6 @@ again: } alist = call->reply[0]; - - /* Start with IPv6 if available. */ - if (alist->nr_ipv4 < alist->nr_addrs) - alist->index = alist->nr_ipv4; - _leave(" = 0 [done]"); return 0; } @@ -619,12 +621,11 @@ static const struct afs_call_type afs_YFSVLGetEndpoints = { * Dispatch an operation to get the addresses for a server, where the server is * nominated by UUID. */ -struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net, - struct afs_addr_cursor *ac, - struct key *key, +struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc, const uuid_t *uuid) { struct afs_call *call; + struct afs_net *net = vc->cell->net; __be32 *bp; _enter(""); @@ -635,7 +636,7 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net, if (!call) return ERR_PTR(-ENOMEM); - call->key = key; + call->key = vc->key; call->reply[0] = NULL; call->ret_reply0 = true; @@ -646,5 +647,5 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net, memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */ trace_afs_make_vl_call(call); - return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); + return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false); } diff --git a/fs/afs/volume.c b/fs/afs/volume.c @@ -74,55 +74,19 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, const char *volname, size_t volnamesz) { - struct afs_addr_cursor ac; - struct afs_vldb_entry *vldb; + struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ); + struct afs_vl_cursor vc; int ret; - ret = afs_set_vl_cursor(&ac, cell); - if (ret < 0) - return ERR_PTR(ret); - - while (afs_iterate_addresses(&ac)) { - if (!test_bit(ac.index, &ac.alist->probed)) { - ret = afs_vl_get_capabilities(cell->net, &ac, key); - switch (ret) { - case VL_SERVICE: - clear_bit(ac.index, &ac.alist->yfs); - set_bit(ac.index, &ac.alist->probed); - ac.addr->srx_service = ret; - break; - case YFS_VL_SERVICE: - set_bit(ac.index, &ac.alist->yfs); - set_bit(ac.index, &ac.alist->probed); - ac.addr->srx_service = ret; - break; - } - } - - vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key, - volname, volnamesz); - switch (ac.error) { - case 0: - afs_end_cursor(&ac); - return vldb; - case -ECONNABORTED: - ac.error = afs_abort_to_error(ac.abort_code); - goto error; - case -ENOMEM: - case -ENONET: - goto error; - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - break; - default: - ac.error = -EIO; - goto error; - } + if (!afs_begin_vlserver_operation(&vc, cell, key)) + return ERR_PTR(-ERESTARTSYS); + + while (afs_select_vlserver(&vc)) { + vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz); } -error: - return ERR_PTR(afs_end_cursor(&ac)); + ret = afs_end_vlserver_operation(&vc); + return ret < 0 ? ERR_PTR(ret) : vldb; } /* @@ -270,7 +234,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) /* We look up an ID by passing it as a decimal string in the * operation's name parameter. */ - idsz = sprintf(idbuf, "%u", volume->vid); + idsz = sprintf(idbuf, "%llu", volume->vid); vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); if (IS_ERR(vldb)) { diff --git a/fs/afs/write.c b/fs/afs/write.c @@ -33,10 +33,21 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, loff_t pos, unsigned int len, struct page *page) { struct afs_read *req; + size_t p; + void *data; int ret; _enter(",,%llu", (unsigned long long)pos); + if (pos >= vnode->vfs_inode.i_size) { + p = pos & ~PAGE_MASK; + ASSERTCMP(p + len, <=, PAGE_SIZE); + data = kmap(page); + memset(data + p, 0, len); + kunmap(page); + return 0; + } + req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *), GFP_KERNEL); if (!req) @@ -81,7 +92,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = pos >> PAGE_SHIFT; int ret; - _enter("{%x:%u},{%lx},%u,%u", + _enter("{%llx:%llu},{%lx},%u,%u", vnode->fid.vid, vnode->fid.vnode, index, from, to); /* We want to store information about how much of a page is altered in @@ -181,7 +192,7 @@ int afs_write_end(struct file *file, struct address_space *mapping, loff_t i_size, maybe_i_size; int ret; - _enter("{%x:%u},{%lx}", + _enter("{%llx:%llu},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index); maybe_i_size = pos + copied; @@ -230,7 +241,7 @@ static void afs_kill_pages(struct address_space *mapping, struct pagevec pv; unsigned count, loop; - _enter("{%x:%u},%lx-%lx", + _enter("{%llx:%llu},%lx-%lx", vnode->fid.vid, vnode->fid.vnode, first, last); pagevec_init(&pv); @@ -272,7 +283,7 @@ static void afs_redirty_pages(struct writeback_control *wbc, struct pagevec pv; unsigned count, loop; - _enter("{%x:%u},%lx-%lx", + _enter("{%llx:%llu},%lx-%lx", vnode->fid.vid, vnode->fid.vnode, first, last); pagevec_init(&pv); @@ -314,7 +325,7 @@ static int afs_store_data(struct address_space *mapping, struct list_head *p; int ret = -ENOKEY, ret2; - _enter("%s{%x:%u.%u},%lx,%lx,%x,%x", + _enter("%s{%llx:%llu.%u},%lx,%lx,%x,%x", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, @@ -533,6 +544,7 @@ no_more: case -ENOENT: case -ENOMEDIUM: case -ENXIO: + trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail); afs_kill_pages(mapping, first, last); mapping_set_error(mapping, ret); break; @@ -675,7 +687,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) unsigned count, loop; pgoff_t first = call->first, last = call->last; - _enter("{%x:%u},{%lx-%lx}", + _enter("{%llx:%llu},{%lx-%lx}", vnode->fid.vid, vnode->fid.vnode, first, last); pagevec_init(&pv); @@ -714,7 +726,7 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) ssize_t result; size_t count = iov_iter_count(from); - _enter("{%x.%u},{%zu},", + _enter("{%llx:%llu},{%zu},", vnode->fid.vid, vnode->fid.vnode, count); if (IS_SWAPFILE(&vnode->vfs_inode)) { @@ -742,7 +754,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct inode *inode = file_inode(file); struct afs_vnode *vnode = AFS_FS_I(inode); - _enter("{%x:%u},{n=%pD},%d", + _enter("{%llx:%llu},{n=%pD},%d", vnode->fid.vid, vnode->fid.vnode, file, datasync); @@ -760,7 +772,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) struct afs_vnode *vnode = AFS_FS_I(inode); unsigned long priv; - _enter("{{%x:%u}},{%lx}", + _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, vmf->page->index); sb_start_pagefault(inode->i_sb); diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c @@ -72,7 +72,7 @@ static int afs_xattr_get_fid(const struct xattr_handler *handler, char text[8 + 1 + 8 + 1 + 8 + 1]; size_t len; - len = sprintf(text, "%x:%x:%x", + len = sprintf(text, "%llx:%llx:%x", vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); if (size == 0) return len; diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c @@ -0,0 +1,2184 @@ +/* YFS File Server client stubs + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/circ_buf.h> +#include <linux/iversion.h> +#include "internal.h" +#include "afs_fs.h" +#include "xdr_fs.h" +#include "protocol_yfs.h" + +static const struct afs_fid afs_zero_fid; + +static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi) +{ + call->cbi = afs_get_cb_interest(cbi); +} + +#define xdr_size(x) (sizeof(*x) / sizeof(__be32)) + +static void xdr_decode_YFSFid(const __be32 **_bp, struct afs_fid *fid) +{ + const struct yfs_xdr_YFSFid *x = (const void *)*_bp; + + fid->vid = xdr_to_u64(x->volume); + fid->vnode = xdr_to_u64(x->vnode.lo); + fid->vnode_hi = ntohl(x->vnode.hi); + fid->unique = ntohl(x->vnode.unique); + *_bp += xdr_size(x); +} + +static __be32 *xdr_encode_u32(__be32 *bp, u32 n) +{ + *bp++ = htonl(n); + return bp; +} + +static __be32 *xdr_encode_u64(__be32 *bp, u64 n) +{ + struct yfs_xdr_u64 *x = (void *)bp; + + *x = u64_to_xdr(n); + return bp + xdr_size(x); +} + +static __be32 *xdr_encode_YFSFid(__be32 *bp, struct afs_fid *fid) +{ + struct yfs_xdr_YFSFid *x = (void *)bp; + + x->volume = u64_to_xdr(fid->vid); + x->vnode.lo = u64_to_xdr(fid->vnode); + x->vnode.hi = htonl(fid->vnode_hi); + x->vnode.unique = htonl(fid->unique); + return bp + xdr_size(x); +} + +static size_t xdr_strlen(unsigned int len) +{ + return sizeof(__be32) + round_up(len, sizeof(__be32)); +} + +static __be32 *xdr_encode_string(__be32 *bp, const char *p, unsigned int len) +{ + bp = xdr_encode_u32(bp, len); + bp = memcpy(bp, p, len); + if (len & 3) { + unsigned int pad = 4 - (len & 3); + + memset((u8 *)bp + len, 0, pad); + len += pad; + } + + return bp + len / sizeof(__be32); +} + +static s64 linux_to_yfs_time(const struct timespec64 *t) +{ + /* Convert to 100ns intervals. */ + return (u64)t->tv_sec * 10000000 + t->tv_nsec/100; +} + +static __be32 *xdr_encode_YFSStoreStatus_mode(__be32 *bp, mode_t mode) +{ + struct yfs_xdr_YFSStoreStatus *x = (void *)bp; + + x->mask = htonl(AFS_SET_MODE); + x->mode = htonl(mode & S_IALLUGO); + x->mtime_client = u64_to_xdr(0); + x->owner = u64_to_xdr(0); + x->group = u64_to_xdr(0); + return bp + xdr_size(x); +} + +static __be32 *xdr_encode_YFSStoreStatus_mtime(__be32 *bp, const struct timespec64 *t) +{ + struct yfs_xdr_YFSStoreStatus *x = (void *)bp; + s64 mtime = linux_to_yfs_time(t); + + x->mask = htonl(AFS_SET_MTIME); + x->mode = htonl(0); + x->mtime_client = u64_to_xdr(mtime); + x->owner = u64_to_xdr(0); + x->group = u64_to_xdr(0); + return bp + xdr_size(x); +} + +/* + * Convert a signed 100ns-resolution 64-bit time into a timespec. + */ +static struct timespec64 yfs_time_to_linux(s64 t) +{ + struct timespec64 ts; + u64 abs_t; + + /* + * Unfortunately can not use normal 64 bit division on 32 bit arch, but + * the alternative, do_div, does not work with negative numbers so have + * to special case them + */ + if (t < 0) { + abs_t = -t; + ts.tv_nsec = (time64_t)(do_div(abs_t, 10000000) * 100); + ts.tv_nsec = -ts.tv_nsec; + ts.tv_sec = -abs_t; + } else { + abs_t = t; + ts.tv_nsec = (time64_t)do_div(abs_t, 10000000) * 100; + ts.tv_sec = abs_t; + } + + return ts; +} + +static struct timespec64 xdr_to_time(const struct yfs_xdr_u64 xdr) +{ + s64 t = xdr_to_u64(xdr); + + return yfs_time_to_linux(t); +} + +static void yfs_check_req(struct afs_call *call, __be32 *bp) +{ + size_t len = (void *)bp - call->request; + + if (len > call->request_size) + pr_err("kAFS: %s: Request buffer overflow (%zu>%u)\n", + call->type->name, len, call->request_size); + else if (len < call->request_size) + pr_warning("kAFS: %s: Request buffer underflow (%zu<%u)\n", + call->type->name, len, call->request_size); +} + +/* + * Dump a bad file status record. + */ +static void xdr_dump_bad(const __be32 *bp) +{ + __be32 x[4]; + int i; + + pr_notice("YFS XDR: Bad status record\n"); + for (i = 0; i < 5 * 4 * 4; i += 16) { + memcpy(x, bp, 16); + bp += 4; + pr_notice("%03x: %08x %08x %08x %08x\n", + i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3])); + } + + memcpy(x, bp, 4); + pr_notice("0x50: %08x\n", ntohl(x[0])); +} + +/* + * Decode a YFSFetchStatus block + */ +static int xdr_decode_YFSFetchStatus(struct afs_call *call, + const __be32 **_bp, + struct afs_file_status *status, + struct afs_vnode *vnode, + const afs_dataversion_t *expected_version, + struct afs_read *read_req) +{ + const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp; + u32 type; + u8 flags = 0; + + status->abort_code = ntohl(xdr->abort_code); + if (status->abort_code != 0) { + if (vnode && status->abort_code == VNOVNODE) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + status->nlink = 0; + __afs_break_callback(vnode); + } + return 0; + } + + type = ntohl(xdr->type); + switch (type) { + case AFS_FTYPE_FILE: + case AFS_FTYPE_DIR: + case AFS_FTYPE_SYMLINK: + if (type != status->type && + vnode && + !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { + pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n", + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + status->type, type); + goto bad; + } + status->type = type; + break; + default: + goto bad; + } + +#define EXTRACT_M4(FIELD) \ + do { \ + u32 x = ntohl(xdr->FIELD); \ + if (status->FIELD != x) { \ + flags |= AFS_VNODE_META_CHANGED; \ + status->FIELD = x; \ + } \ + } while (0) + +#define EXTRACT_M8(FIELD) \ + do { \ + u64 x = xdr_to_u64(xdr->FIELD); \ + if (status->FIELD != x) { \ + flags |= AFS_VNODE_META_CHANGED; \ + status->FIELD = x; \ + } \ + } while (0) + +#define EXTRACT_D8(FIELD) \ + do { \ + u64 x = xdr_to_u64(xdr->FIELD); \ + if (status->FIELD != x) { \ + flags |= AFS_VNODE_DATA_CHANGED; \ + status->FIELD = x; \ + } \ + } while (0) + + EXTRACT_M4(nlink); + EXTRACT_D8(size); + EXTRACT_D8(data_version); + EXTRACT_M8(author); + EXTRACT_M8(owner); + EXTRACT_M8(group); + EXTRACT_M4(mode); + EXTRACT_M4(caller_access); /* call ticket dependent */ + EXTRACT_M4(anon_access); + + status->mtime_client = xdr_to_time(xdr->mtime_client); + status->mtime_server = xdr_to_time(xdr->mtime_server); + status->lock_count = ntohl(xdr->lock_count); + + if (read_req) { + read_req->data_version = status->data_version; + read_req->file_size = status->size; + } + + *_bp += xdr_size(xdr); + + if (vnode) { + if (test_bit(AFS_VNODE_UNSET, &vnode->flags)) + flags |= AFS_VNODE_NOT_YET_SET; + afs_update_inode_from_status(vnode, status, expected_version, + flags); + } + + return 0; + +bad: + xdr_dump_bad(*_bp); + return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); +} + +/* + * Decode the file status. We need to lock the target vnode if we're going to + * update its status so that stat() sees the attributes update atomically. + */ +static int yfs_decode_status(struct afs_call *call, + const __be32 **_bp, + struct afs_file_status *status, + struct afs_vnode *vnode, + const afs_dataversion_t *expected_version, + struct afs_read *read_req) +{ + int ret; + + if (!vnode) + return xdr_decode_YFSFetchStatus(call, _bp, status, vnode, + expected_version, read_req); + + write_seqlock(&vnode->cb_lock); + ret = xdr_decode_YFSFetchStatus(call, _bp, status, vnode, + expected_version, read_req); + write_sequnlock(&vnode->cb_lock); + return ret; +} + +/* + * Decode a YFSCallBack block + */ +static void xdr_decode_YFSCallBack(struct afs_call *call, + struct afs_vnode *vnode, + const __be32 **_bp) +{ + struct yfs_xdr_YFSCallBack *xdr = (void *)*_bp; + struct afs_cb_interest *old, *cbi = call->cbi; + u64 cb_expiry; + + write_seqlock(&vnode->cb_lock); + + if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) { + cb_expiry = xdr_to_u64(xdr->expiration_time); + do_div(cb_expiry, 10 * 1000 * 1000); + vnode->cb_version = ntohl(xdr->version); + vnode->cb_type = ntohl(xdr->type); + vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds(); + old = vnode->cb_interest; + if (old != call->cbi) { + vnode->cb_interest = cbi; + cbi = old; + } + set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } + + write_sequnlock(&vnode->cb_lock); + call->cbi = cbi; + *_bp += xdr_size(xdr); +} + +static void xdr_decode_YFSCallBack_raw(const __be32 **_bp, + struct afs_callback *cb) +{ + struct yfs_xdr_YFSCallBack *x = (void *)*_bp; + u64 cb_expiry; + + cb_expiry = xdr_to_u64(x->expiration_time); + do_div(cb_expiry, 10 * 1000 * 1000); + cb->version = ntohl(x->version); + cb->type = ntohl(x->type); + cb->expires_at = cb_expiry + ktime_get_real_seconds(); + + *_bp += xdr_size(x); +} + +/* + * Decode a YFSVolSync block + */ +static void xdr_decode_YFSVolSync(const __be32 **_bp, + struct afs_volsync *volsync) +{ + struct yfs_xdr_YFSVolSync *x = (void *)*_bp; + u64 creation; + + if (volsync) { + creation = xdr_to_u64(x->vol_creation_date); + do_div(creation, 10 * 1000 * 1000); + volsync->creation = creation; + } + + *_bp += xdr_size(x); +} + +/* + * Encode the requested attributes into a YFSStoreStatus block + */ +static __be32 *xdr_encode_YFS_StoreStatus(__be32 *bp, struct iattr *attr) +{ + struct yfs_xdr_YFSStoreStatus *x = (void *)bp; + s64 mtime = 0, owner = 0, group = 0; + u32 mask = 0, mode = 0; + + mask = 0; + if (attr->ia_valid & ATTR_MTIME) { + mask |= AFS_SET_MTIME; + mtime = linux_to_yfs_time(&attr->ia_mtime); + } + + if (attr->ia_valid & ATTR_UID) { + mask |= AFS_SET_OWNER; + owner = from_kuid(&init_user_ns, attr->ia_uid); + } + + if (attr->ia_valid & ATTR_GID) { + mask |= AFS_SET_GROUP; + group = from_kgid(&init_user_ns, attr->ia_gid); + } + + if (attr->ia_valid & ATTR_MODE) { + mask |= AFS_SET_MODE; + mode = attr->ia_mode & S_IALLUGO; + } + + x->mask = htonl(mask); + x->mode = htonl(mode); + x->mtime_client = u64_to_xdr(mtime); + x->owner = u64_to_xdr(owner); + x->group = u64_to_xdr(group); + return bp + xdr_size(x); +} + +/* + * Decode a YFSFetchVolumeStatus block. + */ +static void xdr_decode_YFSFetchVolumeStatus(const __be32 **_bp, + struct afs_volume_status *vs) +{ + const struct yfs_xdr_YFSFetchVolumeStatus *x = (const void *)*_bp; + u32 flags; + + vs->vid = xdr_to_u64(x->vid); + vs->parent_id = xdr_to_u64(x->parent_id); + flags = ntohl(x->flags); + vs->online = flags & yfs_FVSOnline; + vs->in_service = flags & yfs_FVSInservice; + vs->blessed = flags & yfs_FVSBlessed; + vs->needs_salvage = flags & yfs_FVSNeedsSalvage; + vs->type = ntohl(x->type); + vs->min_quota = 0; + vs->max_quota = xdr_to_u64(x->max_quota); + vs->blocks_in_use = xdr_to_u64(x->blocks_in_use); + vs->part_blocks_avail = xdr_to_u64(x->part_blocks_avail); + vs->part_max_blocks = xdr_to_u64(x->part_max_blocks); + vs->vol_copy_date = xdr_to_u64(x->vol_copy_date); + vs->vol_backup_date = xdr_to_u64(x->vol_backup_date); + *_bp += sizeof(*x) / sizeof(__be32); +} + +/* + * deliver reply data to an FS.FetchStatus + */ +static int yfs_deliver_fs_fetch_status_vnode(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSCallBack(call, vnode, &bp); + xdr_decode_YFSVolSync(&bp, call->reply[1]); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.FetchStatus operation type + */ +static const struct afs_call_type yfs_RXYFSFetchStatus_vnode = { + .name = "YFS.FetchStatus(vnode)", + .op = yfs_FS_FetchStatus, + .deliver = yfs_deliver_fs_fetch_status_vnode, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the status information for a file. + */ +int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync, + bool new_inode) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + + call = afs_alloc_flat_call(net, &yfs_RXYFSFetchStatus_vnode, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) { + fc->ac.error = -ENOMEM; + return -ENOMEM; + } + + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = volsync; + call->expected_version = new_inode ? 1 : vnode->status.data_version; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSFETCHSTATUS); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + yfs_check_req(call, bp); + + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an YFS.FetchData64. + */ +static int yfs_deliver_fs_fetch_data64(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + struct afs_read *req = call->reply[2]; + const __be32 *bp; + unsigned int size; + int ret; + + _enter("{%u,%zu/%llu}", + call->unmarshall, iov_iter_count(&call->iter), req->actual_len); + + switch (call->unmarshall) { + case 0: + req->actual_len = 0; + req->index = 0; + req->offset = req->pos & (PAGE_SIZE - 1); + afs_extract_to_tmp64(call); + call->unmarshall++; + + /* extract the returned data length */ + case 1: + _debug("extract data length"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + req->actual_len = be64_to_cpu(call->tmp64); + _debug("DATA length: %llu", req->actual_len); + req->remain = min(req->len, req->actual_len); + if (req->remain == 0) + goto no_more_data; + + call->unmarshall++; + + begin_page: + ASSERTCMP(req->index, <, req->nr_pages); + if (req->remain > PAGE_SIZE - req->offset) + size = PAGE_SIZE - req->offset; + else + size = req->remain; + call->bvec[0].bv_len = size; + call->bvec[0].bv_offset = req->offset; + call->bvec[0].bv_page = req->pages[req->index]; + iov_iter_bvec(&call->iter, READ, call->bvec, 1, size); + ASSERTCMP(size, <=, PAGE_SIZE); + + /* extract the returned data */ + case 2: + _debug("extract data %zu/%llu", + iov_iter_count(&call->iter), req->remain); + + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + req->remain -= call->bvec[0].bv_len; + req->offset += call->bvec[0].bv_len; + ASSERTCMP(req->offset, <=, PAGE_SIZE); + if (req->offset == PAGE_SIZE) { + req->offset = 0; + if (req->page_done) + req->page_done(call, req); + req->index++; + if (req->remain > 0) + goto begin_page; + } + + ASSERTCMP(req->remain, ==, 0); + if (req->actual_len <= req->len) + goto no_more_data; + + /* Discard any excess data the server gave us */ + iov_iter_discard(&call->iter, READ, req->actual_len - req->len); + call->unmarshall = 3; + case 3: + _debug("extract discard %zu/%llu", + iov_iter_count(&call->iter), req->actual_len - req->len); + + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + no_more_data: + call->unmarshall = 4; + afs_extract_to_buf(call, + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + + /* extract the metadata */ + case 4: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &vnode->status.data_version, req); + if (ret < 0) + return ret; + xdr_decode_YFSCallBack(call, vnode, &bp); + xdr_decode_YFSVolSync(&bp, call->reply[1]); + + call->unmarshall++; + + case 5: + break; + } + + for (; req->index < req->nr_pages; req->index++) { + if (req->offset < PAGE_SIZE) + zero_user_segment(req->pages[req->index], + req->offset, PAGE_SIZE); + if (req->page_done) + req->page_done(call, req); + req->offset = 0; + } + + _leave(" = 0 [done]"); + return 0; +} + +static void yfs_fetch_data_destructor(struct afs_call *call) +{ + struct afs_read *req = call->reply[2]; + + afs_put_read(req); + afs_flat_call_destructor(call); +} + +/* + * YFS.FetchData64 operation type + */ +static const struct afs_call_type yfs_RXYFSFetchData64 = { + .name = "YFS.FetchData64", + .op = yfs_FS_FetchData64, + .deliver = yfs_deliver_fs_fetch_data64, + .destructor = yfs_fetch_data_destructor, +}; + +/* + * Fetch data from a file. + */ +int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + _enter(",%x,{%llx:%llu},%llx,%llx", + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode, + req->pos, req->len); + + call = afs_alloc_flat_call(net, &yfs_RXYFSFetchData64, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_u64) * 2, + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = NULL; /* volsync */ + call->reply[2] = req; + call->expected_version = vnode->status.data_version; + call->want_reply_time = true; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSFETCHDATA64); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_u64(bp, req->pos); + bp = xdr_encode_u64(bp, req->len); + yfs_check_req(call, bp); + + refcount_inc(&req->usage); + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data for YFS.CreateFile or YFS.MakeDir. + */ +static int yfs_deliver_fs_create_vnode(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_YFSFid(&bp, call->reply[1]); + ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL); + if (ret < 0) + return ret; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSCallBack_raw(&bp, call->reply[3]); + xdr_decode_YFSVolSync(&bp, NULL); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.CreateFile and FS.MakeDir operation type + */ +static const struct afs_call_type afs_RXFSCreateFile = { + .name = "YFS.CreateFile", + .op = yfs_FS_CreateFile, + .deliver = yfs_deliver_fs_create_vnode, + .destructor = afs_flat_call_destructor, +}; + +/* + * Create a file. + */ +int yfs_fs_create_file(struct afs_fs_cursor *fc, + const char *name, + umode_t mode, + u64 current_data_version, + struct afs_fid *newfid, + struct afs_file_status *newstatus, + struct afs_callback *newcb) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + size_t namesz, reqsz, rplsz; + __be32 *bp; + + _enter(""); + + namesz = strlen(name); + reqsz = (sizeof(__be32) + + sizeof(__be32) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(namesz) + + sizeof(struct yfs_xdr_YFSStoreStatus) + + sizeof(__be32)); + rplsz = (sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + + call = afs_alloc_flat_call(net, &afs_RXFSCreateFile, reqsz, rplsz); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = newfid; + call->reply[2] = newstatus; + call->reply[3] = newcb; + call->expected_version = current_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSCREATEFILE); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_string(bp, name, namesz); + bp = xdr_encode_YFSStoreStatus_mode(bp, mode); + bp = xdr_encode_u32(bp, 0); /* ViceLockType */ + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +static const struct afs_call_type yfs_RXFSMakeDir = { + .name = "YFS.MakeDir", + .op = yfs_FS_MakeDir, + .deliver = yfs_deliver_fs_create_vnode, + .destructor = afs_flat_call_destructor, +}; + +/* + * Make a directory. + */ +int yfs_fs_make_dir(struct afs_fs_cursor *fc, + const char *name, + umode_t mode, + u64 current_data_version, + struct afs_fid *newfid, + struct afs_file_status *newstatus, + struct afs_callback *newcb) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + size_t namesz, reqsz, rplsz; + __be32 *bp; + + _enter(""); + + namesz = strlen(name); + reqsz = (sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(namesz) + + sizeof(struct yfs_xdr_YFSStoreStatus)); + rplsz = (sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + + call = afs_alloc_flat_call(net, &yfs_RXFSMakeDir, reqsz, rplsz); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = newfid; + call->reply[2] = newstatus; + call->reply[3] = newcb; + call->expected_version = current_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSMAKEDIR); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_string(bp, name, namesz); + bp = xdr_encode_YFSStoreStatus_mode(bp, mode); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.RemoveFile2 operation. + */ +static int yfs_deliver_fs_remove_file2(struct afs_call *call) +{ + struct afs_vnode *dvnode = call->reply[0]; + struct afs_vnode *vnode = call->reply[1]; + struct afs_fid fid; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + + xdr_decode_YFSFid(&bp, &fid); + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL); + if (ret < 0) + return ret; + /* Was deleted if vnode->status.abort_code == VNOVNODE. */ + + xdr_decode_YFSVolSync(&bp, NULL); + return 0; +} + +/* + * YFS.RemoveFile2 operation type. + */ +static const struct afs_call_type yfs_RXYFSRemoveFile2 = { + .name = "YFS.RemoveFile2", + .op = yfs_FS_RemoveFile2, + .deliver = yfs_deliver_fs_remove_file2, + .destructor = afs_flat_call_destructor, +}; + +/* + * Remove a file and retrieve new file status. + */ +int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + const char *name, u64 current_data_version) +{ + struct afs_vnode *dvnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(dvnode); + size_t namesz; + __be32 *bp; + + _enter(""); + + namesz = strlen(name); + + call = afs_alloc_flat_call(net, &yfs_RXYFSRemoveFile2, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(namesz), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = dvnode; + call->reply[1] = vnode; + call->expected_version = current_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSREMOVEFILE2); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvnode->fid); + bp = xdr_encode_string(bp, name, namesz); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &dvnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.RemoveFile or YFS.RemoveDir operation. + */ +static int yfs_deliver_fs_remove(struct afs_call *call) +{ + struct afs_vnode *dvnode = call->reply[0]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + + xdr_decode_YFSVolSync(&bp, NULL); + return 0; +} + +/* + * FS.RemoveDir and FS.RemoveFile operation types. + */ +static const struct afs_call_type yfs_RXYFSRemoveFile = { + .name = "YFS.RemoveFile", + .op = yfs_FS_RemoveFile, + .deliver = yfs_deliver_fs_remove, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type yfs_RXYFSRemoveDir = { + .name = "YFS.RemoveDir", + .op = yfs_FS_RemoveDir, + .deliver = yfs_deliver_fs_remove, + .destructor = afs_flat_call_destructor, +}; + +/* + * remove a file or directory + */ +int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + const char *name, bool isdir, u64 current_data_version) +{ + struct afs_vnode *dvnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(dvnode); + size_t namesz; + __be32 *bp; + + _enter(""); + + namesz = strlen(name); + call = afs_alloc_flat_call( + net, isdir ? &yfs_RXYFSRemoveDir : &yfs_RXYFSRemoveFile, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(namesz), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = dvnode; + call->reply[1] = vnode; + call->expected_version = current_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, isdir ? YFSREMOVEDIR : YFSREMOVEFILE); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvnode->fid); + bp = xdr_encode_string(bp, name, namesz); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &dvnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.Link operation. + */ +static int yfs_deliver_fs_link(struct afs_call *call) +{ + struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL); + if (ret < 0) + return ret; + ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSVolSync(&bp, NULL); + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.Link operation type. + */ +static const struct afs_call_type yfs_RXYFSLink = { + .name = "YFS.Link", + .op = yfs_FS_Link, + .deliver = yfs_deliver_fs_link, + .destructor = afs_flat_call_destructor, +}; + +/* + * Make a hard link. + */ +int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + const char *name, u64 current_data_version) +{ + struct afs_vnode *dvnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + size_t namesz; + __be32 *bp; + + _enter(""); + + namesz = strlen(name); + call = afs_alloc_flat_call(net, &yfs_RXYFSLink, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(namesz) + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = dvnode; + call->reply[1] = vnode; + call->expected_version = current_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSLINK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvnode->fid); + bp = xdr_encode_string(bp, name, namesz); + bp = xdr_encode_YFSFid(bp, &vnode->fid); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.Symlink operation. + */ +static int yfs_deliver_fs_symlink(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_YFSFid(&bp, call->reply[1]); + ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL); + if (ret < 0) + return ret; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSVolSync(&bp, NULL); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.Symlink operation type + */ +static const struct afs_call_type yfs_RXYFSSymlink = { + .name = "YFS.Symlink", + .op = yfs_FS_Symlink, + .deliver = yfs_deliver_fs_symlink, + .destructor = afs_flat_call_destructor, +}; + +/* + * Create a symbolic link. + */ +int yfs_fs_symlink(struct afs_fs_cursor *fc, + const char *name, + const char *contents, + u64 current_data_version, + struct afs_fid *newfid, + struct afs_file_status *newstatus) +{ + struct afs_vnode *dvnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(dvnode); + size_t namesz, contents_sz; + __be32 *bp; + + _enter(""); + + namesz = strlen(name); + contents_sz = strlen(contents); + call = afs_alloc_flat_call(net, &yfs_RXYFSSymlink, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(namesz) + + xdr_strlen(contents_sz) + + sizeof(struct yfs_xdr_YFSStoreStatus), + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = dvnode; + call->reply[1] = newfid; + call->reply[2] = newstatus; + call->expected_version = current_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSYMLINK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvnode->fid); + bp = xdr_encode_string(bp, name, namesz); + bp = xdr_encode_string(bp, contents, contents_sz); + bp = xdr_encode_YFSStoreStatus_mode(bp, S_IRWXUGO); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &dvnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.Rename operation. + */ +static int yfs_deliver_fs_rename(struct afs_call *call) +{ + struct afs_vnode *orig_dvnode = call->reply[0]; + struct afs_vnode *new_dvnode = call->reply[1]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + if (new_dvnode != orig_dvnode) { + ret = yfs_decode_status(call, &bp, &new_dvnode->status, new_dvnode, + &call->expected_version_2, NULL); + if (ret < 0) + return ret; + } + + xdr_decode_YFSVolSync(&bp, NULL); + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.Rename operation type + */ +static const struct afs_call_type yfs_RXYFSRename = { + .name = "FS.Rename", + .op = yfs_FS_Rename, + .deliver = yfs_deliver_fs_rename, + .destructor = afs_flat_call_destructor, +}; + +/* + * Rename a file or directory. + */ +int yfs_fs_rename(struct afs_fs_cursor *fc, + const char *orig_name, + struct afs_vnode *new_dvnode, + const char *new_name, + u64 current_orig_data_version, + u64 current_new_data_version) +{ + struct afs_vnode *orig_dvnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(orig_dvnode); + size_t o_namesz, n_namesz; + __be32 *bp; + + _enter(""); + + o_namesz = strlen(orig_name); + n_namesz = strlen(new_name); + call = afs_alloc_flat_call(net, &yfs_RXYFSRename, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(o_namesz) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(n_namesz), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = orig_dvnode; + call->reply[1] = new_dvnode; + call->expected_version = current_orig_data_version + 1; + call->expected_version_2 = current_new_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSRENAME); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &orig_dvnode->fid); + bp = xdr_encode_string(bp, orig_name, o_namesz); + bp = xdr_encode_YFSFid(bp, &new_dvnode->fid); + bp = xdr_encode_string(bp, new_name, n_namesz); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &orig_dvnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.StoreData64 operation. + */ +static int yfs_deliver_fs_store_data(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + _enter(""); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSVolSync(&bp, NULL); + + afs_pages_written_back(vnode, call); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.StoreData64 operation type. + */ +static const struct afs_call_type yfs_RXYFSStoreData64 = { + .name = "YFS.StoreData64", + .op = yfs_FS_StoreData64, + .deliver = yfs_deliver_fs_store_data, + .destructor = afs_flat_call_destructor, +}; + +/* + * Store a set of pages to a large file. + */ +int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, + pgoff_t first, pgoff_t last, + unsigned offset, unsigned to) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + loff_t size, pos, i_size; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + + size = (loff_t)to - (loff_t)offset; + if (first != last) + size += (loff_t)(last - first) << PAGE_SHIFT; + pos = (loff_t)first << PAGE_SHIFT; + pos += offset; + + i_size = i_size_read(&vnode->vfs_inode); + if (pos + size > i_size) + i_size = size + pos; + + _debug("size %llx, at %llx, i_size %llx", + (unsigned long long)size, (unsigned long long)pos, + (unsigned long long)i_size); + + call = afs_alloc_flat_call(net, &yfs_RXYFSStoreData64, + sizeof(__be32) + + sizeof(__be32) + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSStoreStatus) + + sizeof(struct yfs_xdr_u64) * 3, + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->mapping = mapping; + call->reply[0] = vnode; + call->first = first; + call->last = last; + call->first_offset = offset; + call->last_to = to; + call->send_pages = true; + call->expected_version = vnode->status.data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSTOREDATA64); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFSStoreStatus_mtime(bp, &vnode->vfs_inode.i_mtime); + bp = xdr_encode_u64(bp, pos); + bp = xdr_encode_u64(bp, size); + bp = xdr_encode_u64(bp, i_size); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * deliver reply data to an FS.StoreStatus + */ +static int yfs_deliver_fs_store_status(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + _enter(""); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSVolSync(&bp, NULL); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.StoreStatus operation type + */ +static const struct afs_call_type yfs_RXYFSStoreStatus = { + .name = "YFS.StoreStatus", + .op = yfs_FS_StoreStatus, + .deliver = yfs_deliver_fs_store_status, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = { + .name = "YFS.StoreData64", + .op = yfs_FS_StoreData64, + .deliver = yfs_deliver_fs_store_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * Set the attributes on a file, using YFS.StoreData64 rather than + * YFS.StoreStatus so as to alter the file size also. + */ +static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + + call = afs_alloc_flat_call(net, &yfs_RXYFSStoreData64_as_Status, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSStoreStatus) + + sizeof(struct yfs_xdr_u64) * 3, + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + call->expected_version = vnode->status.data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSTOREDATA64); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFS_StoreStatus(bp, attr); + bp = xdr_encode_u64(bp, 0); /* position of start of write */ + bp = xdr_encode_u64(bp, 0); /* size of write */ + bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */ + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Set the attributes on a file, using YFS.StoreData64 if there's a change in + * file size, and YFS.StoreStatus otherwise. + */ +int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + if (attr->ia_valid & ATTR_SIZE) + return yfs_fs_setattr_size(fc, attr); + + _enter(",%x,{%llx:%llu},,", + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + + call = afs_alloc_flat_call(net, &yfs_RXYFSStoreStatus, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSStoreStatus), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + call->expected_version = vnode->status.data_version; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSTORESTATUS); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFS_StoreStatus(bp, attr); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.GetVolumeStatus operation. + */ +static int yfs_deliver_fs_get_volume_status(struct afs_call *call) +{ + const __be32 *bp; + char *p; + u32 size; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + call->unmarshall++; + afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus)); + + /* extract the returned status record */ + case 1: + _debug("extract status"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_YFSFetchVolumeStatus(&bp, call->reply[1]); + call->unmarshall++; + afs_extract_to_tmp(call); + + /* extract the volume name length */ + case 2: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("volname length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return afs_protocol_error(call, -EBADMSG, + afs_eproto_volname_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_begin(call, call->reply[2], size); + call->unmarshall++; + + /* extract the volume name */ + case 3: + _debug("extract volname"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + p = call->reply[2]; + p[call->count] = 0; + _debug("volname '%s'", p); + afs_extract_to_tmp(call); + call->unmarshall++; + + /* extract the offline message length */ + case 4: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("offline msg length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return afs_protocol_error(call, -EBADMSG, + afs_eproto_offline_msg_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_begin(call, call->reply[2], size); + call->unmarshall++; + + /* extract the offline message */ + case 5: + _debug("extract offline"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + p = call->reply[2]; + p[call->count] = 0; + _debug("offline '%s'", p); + + afs_extract_to_tmp(call); + call->unmarshall++; + + /* extract the message of the day length */ + case 6: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("motd length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return afs_protocol_error(call, -EBADMSG, + afs_eproto_motd_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_begin(call, call->reply[2], size); + call->unmarshall++; + + /* extract the message of the day */ + case 7: + _debug("extract motd"); + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + p = call->reply[2]; + p[call->count] = 0; + _debug("motd '%s'", p); + + call->unmarshall++; + + case 8: + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * Destroy a YFS.GetVolumeStatus call. + */ +static void yfs_get_volume_status_call_destructor(struct afs_call *call) +{ + kfree(call->reply[2]); + call->reply[2] = NULL; + afs_flat_call_destructor(call); +} + +/* + * YFS.GetVolumeStatus operation type + */ +static const struct afs_call_type yfs_RXYFSGetVolumeStatus = { + .name = "YFS.GetVolumeStatus", + .op = yfs_FS_GetVolumeStatus, + .deliver = yfs_deliver_fs_get_volume_status, + .destructor = yfs_get_volume_status_call_destructor, +}; + +/* + * fetch the status of a volume + */ +int yfs_fs_get_volume_status(struct afs_fs_cursor *fc, + struct afs_volume_status *vs) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + void *tmpbuf; + + _enter(""); + + tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL); + if (!tmpbuf) + return -ENOMEM; + + call = afs_alloc_flat_call(net, &yfs_RXYFSGetVolumeStatus, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_u64), + sizeof(struct yfs_xdr_YFSFetchVolumeStatus) + + sizeof(__be32)); + if (!call) { + kfree(tmpbuf); + return -ENOMEM; + } + + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = vs; + call->reply[2] = tmpbuf; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSGETVOLUMESTATUS); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_u64(bp, vnode->fid.vid); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an YFS.SetLock, YFS.ExtendLock or YFS.ReleaseLock + */ +static int yfs_deliver_fs_xxxx_lock(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSVolSync(&bp, NULL); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.SetLock operation type + */ +static const struct afs_call_type yfs_RXYFSSetLock = { + .name = "YFS.SetLock", + .op = yfs_FS_SetLock, + .deliver = yfs_deliver_fs_xxxx_lock, + .destructor = afs_flat_call_destructor, +}; + +/* + * YFS.ExtendLock operation type + */ +static const struct afs_call_type yfs_RXYFSExtendLock = { + .name = "YFS.ExtendLock", + .op = yfs_FS_ExtendLock, + .deliver = yfs_deliver_fs_xxxx_lock, + .destructor = afs_flat_call_destructor, +}; + +/* + * YFS.ReleaseLock operation type + */ +static const struct afs_call_type yfs_RXYFSReleaseLock = { + .name = "YFS.ReleaseLock", + .op = yfs_FS_ReleaseLock, + .deliver = yfs_deliver_fs_xxxx_lock, + .destructor = afs_flat_call_destructor, +}; + +/* + * Set a lock on a file + */ +int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &yfs_RXYFSSetLock, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(__be32), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSETLOCK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_u32(bp, type); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * extend a lock on a file + */ +int yfs_fs_extend_lock(struct afs_fs_cursor *fc) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &yfs_RXYFSExtendLock, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSEXTENDLOCK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * release a lock on a file + */ +int yfs_fs_release_lock(struct afs_fs_cursor *fc) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &yfs_RXYFSReleaseLock, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSRELEASELOCK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an FS.FetchStatus with no vnode. + */ +static int yfs_deliver_fs_fetch_status(struct afs_call *call) +{ + struct afs_file_status *status = call->reply[1]; + struct afs_callback *callback = call->reply[2]; + struct afs_volsync *volsync = call->reply[3]; + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSCallBack_raw(&bp, callback); + xdr_decode_YFSVolSync(&bp, volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.FetchStatus operation type + */ +static const struct afs_call_type yfs_RXYFSFetchStatus = { + .name = "YFS.FetchStatus", + .op = yfs_FS_FetchStatus, + .deliver = yfs_deliver_fs_fetch_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the status information for a fid without needing a vnode handle. + */ +int yfs_fs_fetch_status(struct afs_fs_cursor *fc, + struct afs_net *net, + struct afs_fid *fid, + struct afs_file_status *status, + struct afs_callback *callback, + struct afs_volsync *volsync) +{ + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(fc->key), fid->vid, fid->vnode); + + call = afs_alloc_flat_call(net, &yfs_RXYFSFetchStatus, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) { + fc->ac.error = -ENOMEM; + return -ENOMEM; + } + + call->key = fc->key; + call->reply[0] = NULL; /* vnode for fid[0] */ + call->reply[1] = status; + call->reply[2] = callback; + call->reply[3] = volsync; + call->expected_version = 1; /* vnode->status.data_version */ + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSFETCHSTATUS); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, fid); + yfs_check_req(call, bp); + + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an YFS.InlineBulkStatus call + */ +static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) +{ + struct afs_file_status *statuses; + struct afs_callback *callbacks; + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + u32 tmp; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + + /* Extract the file status count and array in two steps */ + case 1: + _debug("extract status count"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + tmp = ntohl(call->tmp); + _debug("status count: %u/%u", tmp, call->count2); + if (tmp != call->count2) + return afs_protocol_error(call, -EBADMSG, + afs_eproto_ibulkst_count); + + call->count = 0; + call->unmarshall++; + more_counts: + afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus)); + + case 2: + _debug("extract status array %u", call->count); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + bp = call->buffer; + statuses = call->reply[1]; + ret = yfs_decode_status(call, &bp, &statuses[call->count], + call->count == 0 ? vnode : NULL, + NULL, NULL); + if (ret < 0) + return ret; + + call->count++; + if (call->count < call->count2) + goto more_counts; + + call->count = 0; + call->unmarshall++; + afs_extract_to_tmp(call); + + /* Extract the callback count and array in two steps */ + case 3: + _debug("extract CB count"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + tmp = ntohl(call->tmp); + _debug("CB count: %u", tmp); + if (tmp != call->count2) + return afs_protocol_error(call, -EBADMSG, + afs_eproto_ibulkst_cb_count); + call->count = 0; + call->unmarshall++; + more_cbs: + afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack)); + + case 4: + _debug("extract CB array"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + _debug("unmarshall CB array"); + bp = call->buffer; + callbacks = call->reply[2]; + xdr_decode_YFSCallBack_raw(&bp, &callbacks[call->count]); + statuses = call->reply[1]; + if (call->count == 0 && vnode && statuses[0].abort_code == 0) { + bp = call->buffer; + xdr_decode_YFSCallBack(call, vnode, &bp); + } + call->count++; + if (call->count < call->count2) + goto more_cbs; + + afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync)); + call->unmarshall++; + + case 5: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_YFSVolSync(&bp, call->reply[3]); + + call->unmarshall++; + + case 6: + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.InlineBulkStatus operation type + */ +static const struct afs_call_type yfs_RXYFSInlineBulkStatus = { + .name = "YFS.InlineBulkStatus", + .op = yfs_FS_InlineBulkStatus, + .deliver = yfs_deliver_fs_inline_bulk_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the status information for up to 1024 files + */ +int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc, + struct afs_net *net, + struct afs_fid *fids, + struct afs_file_status *statuses, + struct afs_callback *callbacks, + unsigned int nr_fids, + struct afs_volsync *volsync) +{ + struct afs_call *call; + __be32 *bp; + int i; + + _enter(",%x,{%llx:%llu},%u", + key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids); + + call = afs_alloc_flat_call(net, &yfs_RXYFSInlineBulkStatus, + sizeof(__be32) + + sizeof(__be32) + + sizeof(__be32) + + sizeof(struct yfs_xdr_YFSFid) * nr_fids, + sizeof(struct yfs_xdr_YFSFetchStatus)); + if (!call) { + fc->ac.error = -ENOMEM; + return -ENOMEM; + } + + call->key = fc->key; + call->reply[0] = NULL; /* vnode for fid[0] */ + call->reply[1] = statuses; + call->reply[2] = callbacks; + call->reply[3] = volsync; + call->count2 = nr_fids; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSINLINEBULKSTATUS); + bp = xdr_encode_u32(bp, 0); /* RPCFlags */ + bp = xdr_encode_u32(bp, nr_fids); + for (i = 0; i < nr_fids; i++) + bp = xdr_encode_YFSFid(bp, &fids[i]); + yfs_check_req(call, bp); + + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &fids[0]); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} diff --git a/fs/block_dev.c b/fs/block_dev.c @@ -349,7 +349,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) dio->size = 0; dio->multi_bio = false; - dio->should_dirty = is_read && (iter->type == ITER_IOVEC); + dio->should_dirty = is_read && iter_is_iovec(iter); blk_start_plug(&plug); for (;;) { diff --git a/fs/ceph/file.c b/fs/ceph/file.c @@ -615,7 +615,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, more = len < iov_iter_count(to); - if (unlikely(to->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(to))) { ret = iov_iter_get_pages_alloc(to, &pages, len, &page_off); if (ret <= 0) { @@ -662,7 +662,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, ret += zlen; } - if (unlikely(to->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(to))) { if (ret > 0) { iov_iter_advance(to, ret); off += ret; @@ -815,7 +815,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) aio_req->total_len = rc + zlen; } - iov_iter_bvec(&i, ITER_BVEC, osd_data->bvec_pos.bvecs, + iov_iter_bvec(&i, READ, osd_data->bvec_pos.bvecs, osd_data->num_bvecs, osd_data->bvec_pos.iter.bi_size); iov_iter_advance(&i, rc); @@ -1038,8 +1038,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, int zlen = min_t(size_t, len - ret, size - pos - ret); - iov_iter_bvec(&i, ITER_BVEC, bvecs, num_pages, - len); + iov_iter_bvec(&i, READ, bvecs, num_pages, len); iov_iter_advance(&i, ret); iov_iter_zero(zlen, &i); ret += zlen; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c @@ -589,7 +589,7 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, { struct msghdr smb_msg; struct kvec iov = {.iov_base = buf, .iov_len = to_read}; - iov_iter_kvec(&smb_msg.msg_iter, READ | ITER_KVEC, &iov, 1, to_read); + iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read); return cifs_readv_from_socket(server, &smb_msg); } @@ -601,7 +601,7 @@ cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page, struct msghdr smb_msg; struct bio_vec bv = { .bv_page = page, .bv_len = to_read, .bv_offset = page_offset}; - iov_iter_bvec(&smb_msg.msg_iter, READ | ITER_BVEC, &bv, 1, to_read); + iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read); return cifs_readv_from_socket(server, &smb_msg); } diff --git a/fs/cifs/file.c b/fs/cifs/file.c @@ -3004,7 +3004,7 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter) size_t copy = min_t(size_t, remaining, PAGE_SIZE); size_t written; - if (unlikely(iter->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(iter))) { void *addr = kmap_atomic(page); written = copy_to_iter(addr, copy, iter); @@ -3316,7 +3316,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) if (!is_sync_kiocb(iocb)) ctx->iocb = iocb; - if (to->type == ITER_IOVEC) + if (iter_is_iovec(to)) ctx->should_dirty = true; rc = setup_aio_ctx_iter(ctx, to, READ); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c @@ -788,7 +788,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw) struct page **pages = NULL; struct bio_vec *bv = NULL; - if (iter->type & ITER_KVEC) { + if (iov_iter_is_kvec(iter)) { memcpy(&ctx->iter, iter, sizeof(struct iov_iter)); ctx->len = count; iov_iter_advance(iter, count); @@ -859,7 +859,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw) ctx->bv = bv; ctx->len = saved_len - count; ctx->npages = npages; - iov_iter_bvec(&ctx->iter, ITER_BVEC | rw, ctx->bv, npages, ctx->len); + iov_iter_bvec(&ctx->iter, rw, ctx->bv, npages, ctx->len); return 0; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c @@ -3152,13 +3152,13 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, return 0; } - iov_iter_bvec(&iter, WRITE | ITER_BVEC, bvec, npages, data_len); + iov_iter_bvec(&iter, WRITE, bvec, npages, data_len); } else if (buf_len >= data_offset + data_len) { /* read response payload is in buf */ WARN_ONCE(npages > 0, "read data can be either in buf or in pages"); iov.iov_base = buf + data_offset; iov.iov_len = data_len; - iov_iter_kvec(&iter, WRITE | ITER_KVEC, &iov, 1, data_len); + iov_iter_kvec(&iter, WRITE, &iov, 1, data_len); } else { /* read response payload cannot be in both buf and pages */ WARN_ONCE(1, "buf can not contain only a part of read data"); diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c @@ -2054,14 +2054,22 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) info->smbd_recv_pending++; - switch (msg->msg_iter.type) { - case READ | ITER_KVEC: + if (iov_iter_rw(&msg->msg_iter) == WRITE) { + /* It's a bug in upper layer to get there */ + cifs_dbg(VFS, "CIFS: invalid msg iter dir %u\n", + iov_iter_rw(&msg->msg_iter)); + rc = -EINVAL; + goto out; + } + + switch (iov_iter_type(&msg->msg_iter)) { + case ITER_KVEC: buf = msg->msg_iter.kvec->iov_base; to_read = msg->msg_iter.kvec->iov_len; rc = smbd_recv_buf(info, buf, to_read); break; - case READ | ITER_BVEC: + case ITER_BVEC: page = msg->msg_iter.bvec->bv_page; page_offset = msg->msg_iter.bvec->bv_offset; to_read = msg->msg_iter.bvec->bv_len; @@ -2071,10 +2079,11 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) default: /* It's a bug in upper layer to get there */ cifs_dbg(VFS, "CIFS: invalid msg type %d\n", - msg->msg_iter.type); + iov_iter_type(&msg->msg_iter)); rc = -EINVAL; } +out: info->smbd_recv_pending--; wake_up(&info->wait_smbd_recv_pending); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c @@ -316,8 +316,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, .iov_base = &rfc1002_marker, .iov_len = 4 }; - iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC, &hiov, - 1, 4); + iov_iter_kvec(&smb_msg.msg_iter, WRITE, &hiov, 1, 4); rc = smb_send_kvec(server, &smb_msg, &sent); if (rc < 0) goto uncork; @@ -338,8 +337,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, size += iov[i].iov_len; } - iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC, - iov, n_vec, size); + iov_iter_kvec(&smb_msg.msg_iter, WRITE, iov, n_vec, size); rc = smb_send_kvec(server, &smb_msg, &sent); if (rc < 0) @@ -355,7 +353,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, rqst_page_get_length(&rqst[j], i, &bvec.bv_len, &bvec.bv_offset); - iov_iter_bvec(&smb_msg.msg_iter, WRITE | ITER_BVEC, + iov_iter_bvec(&smb_msg.msg_iter, WRITE, &bvec, 1, bvec.bv_len); rc = smb_send_kvec(server, &smb_msg, &sent); if (rc < 0) diff --git a/fs/direct-io.c b/fs/direct-io.c @@ -1313,7 +1313,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, spin_lock_init(&dio->bio_lock); dio->refcount = 1; - dio->should_dirty = (iter->type == ITER_IOVEC); + dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ; sdio.iter = iter; sdio.final_block_in_request = end >> blkbits; diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c @@ -674,7 +674,7 @@ static int receive_from_sock(struct connection *con) nvec = 2; } len = iov[0].iov_len + iov[1].iov_len; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nvec, len); + iov_iter_kvec(&msg.msg_iter, READ, iov, nvec, len); r = ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT | MSG_NOSIGNAL); if (ret <= 0) diff --git a/fs/fuse/file.c b/fs/fuse/file.c @@ -1275,7 +1275,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, ssize_t ret = 0; /* Special case for kernel I/O: can copy directly into the buffer */ - if (ii->type & ITER_KVEC) { + if (iov_iter_is_kvec(ii)) { unsigned long user_addr = fuse_get_user_addr(ii); size_t frag_size = fuse_get_frag_size(ii, *nbytesp); diff --git a/fs/iomap.c b/fs/iomap.c @@ -1795,7 +1795,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (pos >= dio->i_size) goto out_free_dio; - if (iter->type == ITER_IOVEC) + if (iter_is_iovec(iter) && iov_iter_rw(iter) == READ) dio->flags |= IOMAP_DIO_DIRTY; } else { flags |= IOMAP_WRITE; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c @@ -923,7 +923,7 @@ __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, int host_err; trace_nfsd_read_vector(rqstp, fhp, offset, *count); - iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count); + iov_iter_kvec(&iter, READ, vec, vlen, *count); host_err = vfs_iter_read(file, &iter, &offset, 0); return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err); } @@ -999,7 +999,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (stable && !use_wgather) flags |= RWF_SYNC; - iov_iter_kvec(&iter, WRITE | ITER_KVEC, vec, vlen, *cnt); + iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); host_err = vfs_iter_write(file, &iter, &pos, flags); if (host_err < 0) goto out_nfserr; diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c @@ -916,7 +916,7 @@ static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len) { struct kvec vec = { .iov_len = len, .iov_base = data, }; struct msghdr msg = { .msg_flags = MSG_DONTWAIT, }; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, len); + iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, len); return sock_recvmsg(sock, &msg, MSG_DONTWAIT); } diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c @@ -25,7 +25,7 @@ static int read_one_page(struct page *page) struct iov_iter to; struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE}; - iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE); + iov_iter_bvec(&to, READ, &bv, 1, PAGE_SIZE); gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_readpage called with page %p\n", diff --git a/fs/splice.c b/fs/splice.c @@ -301,7 +301,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, struct kiocb kiocb; int idx, ret; - iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len); + iov_iter_pipe(&to, READ, pipe, len); idx = to.idx; init_sync_kiocb(&kiocb, in); kiocb.ki_pos = *ppos; @@ -386,7 +386,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, */ offset = *ppos & ~PAGE_MASK; - iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset); + iov_iter_pipe(&to, READ, pipe, len + offset); res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base); if (res <= 0) @@ -745,8 +745,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, left -= this_len; } - iov_iter_bvec(&from, ITER_BVEC | WRITE, array, n, - sd.total_len - left); + iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left); ret = vfs_iter_write(out, &from, &sd.pos, 0); if (ret <= 0) break; diff --git a/include/linux/uio.h b/include/linux/uio.h @@ -21,15 +21,16 @@ struct kvec { size_t iov_len; }; -enum { +enum iter_type { ITER_IOVEC = 0, ITER_KVEC = 2, ITER_BVEC = 4, ITER_PIPE = 8, + ITER_DISCARD = 16, }; struct iov_iter { - int type; + unsigned int type; size_t iov_offset; size_t count; union { @@ -47,6 +48,41 @@ struct iov_iter { }; }; +static inline enum iter_type iov_iter_type(const struct iov_iter *i) +{ + return i->type & ~(READ | WRITE); +} + +static inline bool iter_is_iovec(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_IOVEC; +} + +static inline bool iov_iter_is_kvec(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_KVEC; +} + +static inline bool iov_iter_is_bvec(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_BVEC; +} + +static inline bool iov_iter_is_pipe(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_PIPE; +} + +static inline bool iov_iter_is_discard(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_DISCARD; +} + +static inline unsigned char iov_iter_rw(const struct iov_iter *i) +{ + return i->type & (READ | WRITE); +} + /* * Total number of bytes covered by an iovec. * @@ -74,7 +110,8 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) } #define iov_for_each(iov, iter, start) \ - if (!((start).type & (ITER_BVEC | ITER_PIPE))) \ + if (iov_iter_type(start) == ITER_IOVEC || \ + iov_iter_type(start) == ITER_KVEC) \ for (iter = (start); \ (iter).count && \ ((iov = iov_iter_iovec(&(iter))), 1); \ @@ -181,14 +218,15 @@ size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i) size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); -void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, +void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, unsigned long nr_segs, size_t count); -void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec, +void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec, unsigned long nr_segs, size_t count); -void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec, +void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec, unsigned long nr_segs, size_t count); -void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe, +void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, size_t count); +void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, @@ -202,19 +240,6 @@ static inline size_t iov_iter_count(const struct iov_iter *i) return i->count; } -static inline bool iter_is_iovec(const struct iov_iter *i) -{ - return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE)); -} - -/* - * Get one of READ or WRITE out of iter->type without any other flags OR'd in - * with it. - * - * The ?: is just for type safety. - */ -#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE)) - /* * Cap the iov_iter by given limit; note that the second argument is * *not* the new size - it's upper limit for such. Passing it a value diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h @@ -54,6 +54,35 @@ enum afs_fs_operation { afs_FS_StoreData64 = 65538, /* AFS Store file data */ afs_FS_GiveUpAllCallBacks = 65539, /* AFS Give up all our callbacks on a server */ afs_FS_GetCapabilities = 65540, /* AFS Get FS server capabilities */ + + yfs_FS_FetchData = 130, /* YFS Fetch file data */ + yfs_FS_FetchACL = 64131, /* YFS Fetch file ACL */ + yfs_FS_FetchStatus = 64132, /* YFS Fetch file status */ + yfs_FS_StoreACL = 64134, /* YFS Store file ACL */ + yfs_FS_StoreStatus = 64135, /* YFS Store file status */ + yfs_FS_RemoveFile = 64136, /* YFS Remove a file */ + yfs_FS_CreateFile = 64137, /* YFS Create a file */ + yfs_FS_Rename = 64138, /* YFS Rename or move a file or directory */ + yfs_FS_Symlink = 64139, /* YFS Create a symbolic link */ + yfs_FS_Link = 64140, /* YFS Create a hard link */ + yfs_FS_MakeDir = 64141, /* YFS Create a directory */ + yfs_FS_RemoveDir = 64142, /* YFS Remove a directory */ + yfs_FS_GetVolumeStatus = 64149, /* YFS Get volume status information */ + yfs_FS_SetVolumeStatus = 64150, /* YFS Set volume status information */ + yfs_FS_SetLock = 64156, /* YFS Request a file lock */ + yfs_FS_ExtendLock = 64157, /* YFS Extend a file lock */ + yfs_FS_ReleaseLock = 64158, /* YFS Release a file lock */ + yfs_FS_Lookup = 64161, /* YFS lookup file in directory */ + yfs_FS_FlushCPS = 64165, + yfs_FS_FetchOpaqueACL = 64168, + yfs_FS_WhoAmI = 64170, + yfs_FS_RemoveACL = 64171, + yfs_FS_RemoveFile2 = 64173, + yfs_FS_StoreOpaqueACL2 = 64174, + yfs_FS_InlineBulkStatus = 64536, /* YFS Fetch multiple file statuses with errors */ + yfs_FS_FetchData64 = 64537, /* YFS Fetch file data */ + yfs_FS_StoreData64 = 64538, /* YFS Store file data */ + yfs_FS_UpdateSymlink = 64540, }; enum afs_vl_operation { @@ -84,6 +113,44 @@ enum afs_edit_dir_reason { afs_edit_dir_for_unlink, }; +enum afs_eproto_cause { + afs_eproto_bad_status, + afs_eproto_cb_count, + afs_eproto_cb_fid_count, + afs_eproto_file_type, + afs_eproto_ibulkst_cb_count, + afs_eproto_ibulkst_count, + afs_eproto_motd_len, + afs_eproto_offline_msg_len, + afs_eproto_volname_len, + afs_eproto_yvl_fsendpt4_len, + afs_eproto_yvl_fsendpt6_len, + afs_eproto_yvl_fsendpt_num, + afs_eproto_yvl_fsendpt_type, + afs_eproto_yvl_vlendpt4_len, + afs_eproto_yvl_vlendpt6_len, + afs_eproto_yvl_vlendpt_type, +}; + +enum afs_io_error { + afs_io_error_cm_reply, + afs_io_error_extract, + afs_io_error_fs_probe_fail, + afs_io_error_vl_lookup_fail, + afs_io_error_vl_probe_fail, +}; + +enum afs_file_error { + afs_file_error_dir_bad_magic, + afs_file_error_dir_big, + afs_file_error_dir_missing_page, + afs_file_error_dir_over_end, + afs_file_error_dir_small, + afs_file_error_dir_unmarked_ext, + afs_file_error_mntpt, + afs_file_error_writeback_fail, +}; + #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */ /* @@ -119,7 +186,34 @@ enum afs_edit_dir_reason { EM(afs_FS_FetchData64, "FS.FetchData64") \ EM(afs_FS_StoreData64, "FS.StoreData64") \ EM(afs_FS_GiveUpAllCallBacks, "FS.GiveUpAllCallBacks") \ - E_(afs_FS_GetCapabilities, "FS.GetCapabilities") + EM(afs_FS_GetCapabilities, "FS.GetCapabilities") \ + EM(yfs_FS_FetchACL, "YFS.FetchACL") \ + EM(yfs_FS_FetchStatus, "YFS.FetchStatus") \ + EM(yfs_FS_StoreACL, "YFS.StoreACL") \ + EM(yfs_FS_StoreStatus, "YFS.StoreStatus") \ + EM(yfs_FS_RemoveFile, "YFS.RemoveFile") \ + EM(yfs_FS_CreateFile, "YFS.CreateFile") \ + EM(yfs_FS_Rename, "YFS.Rename") \ + EM(yfs_FS_Symlink, "YFS.Symlink") \ + EM(yfs_FS_Link, "YFS.Link") \ + EM(yfs_FS_MakeDir, "YFS.MakeDir") \ + EM(yfs_FS_RemoveDir, "YFS.RemoveDir") \ + EM(yfs_FS_GetVolumeStatus, "YFS.GetVolumeStatus") \ + EM(yfs_FS_SetVolumeStatus, "YFS.SetVolumeStatus") \ + EM(yfs_FS_SetLock, "YFS.SetLock") \ + EM(yfs_FS_ExtendLock, "YFS.ExtendLock") \ + EM(yfs_FS_ReleaseLock, "YFS.ReleaseLock") \ + EM(yfs_FS_Lookup, "YFS.Lookup") \ + EM(yfs_FS_FlushCPS, "YFS.FlushCPS") \ + EM(yfs_FS_FetchOpaqueACL, "YFS.FetchOpaqueACL") \ + EM(yfs_FS_WhoAmI, "YFS.WhoAmI") \ + EM(yfs_FS_RemoveACL, "YFS.RemoveACL") \ + EM(yfs_FS_RemoveFile2, "YFS.RemoveFile2") \ + EM(yfs_FS_StoreOpaqueACL2, "YFS.StoreOpaqueACL2") \ + EM(yfs_FS_InlineBulkStatus, "YFS.InlineBulkStatus") \ + EM(yfs_FS_FetchData64, "YFS.FetchData64") \ + EM(yfs_FS_StoreData64, "YFS.StoreData64") \ + E_(yfs_FS_UpdateSymlink, "YFS.UpdateSymlink") #define afs_vl_operations \ EM(afs_VL_GetEntryByNameU, "VL.GetEntryByNameU") \ @@ -146,6 +240,40 @@ enum afs_edit_dir_reason { EM(afs_edit_dir_for_symlink, "Symlnk") \ E_(afs_edit_dir_for_unlink, "Unlink") +#define afs_eproto_causes \ + EM(afs_eproto_bad_status, "BadStatus") \ + EM(afs_eproto_cb_count, "CbCount") \ + EM(afs_eproto_cb_fid_count, "CbFidCount") \ + EM(afs_eproto_file_type, "FileTYpe") \ + EM(afs_eproto_ibulkst_cb_count, "IBS.CbCount") \ + EM(afs_eproto_ibulkst_count, "IBS.FidCount") \ + EM(afs_eproto_motd_len, "MotdLen") \ + EM(afs_eproto_offline_msg_len, "OfflineMsgLen") \ + EM(afs_eproto_volname_len, "VolNameLen") \ + EM(afs_eproto_yvl_fsendpt4_len, "YVL.FsEnd4Len") \ + EM(afs_eproto_yvl_fsendpt6_len, "YVL.FsEnd6Len") \ + EM(afs_eproto_yvl_fsendpt_num, "YVL.FsEndCount") \ + EM(afs_eproto_yvl_fsendpt_type, "YVL.FsEndType") \ + EM(afs_eproto_yvl_vlendpt4_len, "YVL.VlEnd4Len") \ + EM(afs_eproto_yvl_vlendpt6_len, "YVL.VlEnd6Len") \ + E_(afs_eproto_yvl_vlendpt_type, "YVL.VlEndType") + +#define afs_io_errors \ + EM(afs_io_error_cm_reply, "CM_REPLY") \ + EM(afs_io_error_extract, "EXTRACT") \ + EM(afs_io_error_fs_probe_fail, "FS_PROBE_FAIL") \ + EM(afs_io_error_vl_lookup_fail, "VL_LOOKUP_FAIL") \ + E_(afs_io_error_vl_probe_fail, "VL_PROBE_FAIL") + +#define afs_file_errors \ + EM(afs_file_error_dir_bad_magic, "DIR_BAD_MAGIC") \ + EM(afs_file_error_dir_big, "DIR_BIG") \ + EM(afs_file_error_dir_missing_page, "DIR_MISSING_PAGE") \ + EM(afs_file_error_dir_over_end, "DIR_ENT_OVER_END") \ + EM(afs_file_error_dir_small, "DIR_SMALL") \ + EM(afs_file_error_dir_unmarked_ext, "DIR_UNMARKED_EXT") \ + EM(afs_file_error_mntpt, "MNTPT_READ_FAILED") \ + E_(afs_file_error_writeback_fail, "WRITEBACK_FAILED") /* * Export enum symbols via userspace. @@ -160,6 +288,9 @@ afs_fs_operations; afs_vl_operations; afs_edit_dir_ops; afs_edit_dir_reasons; +afs_eproto_causes; +afs_io_errors; +afs_file_errors; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -170,17 +301,16 @@ afs_edit_dir_reasons; #define EM(a, b) { a, b }, #define E_(a, b) { a, b } -TRACE_EVENT(afs_recv_data, - TP_PROTO(struct afs_call *call, unsigned count, unsigned offset, +TRACE_EVENT(afs_receive_data, + TP_PROTO(struct afs_call *call, struct iov_iter *iter, bool want_more, int ret), - TP_ARGS(call, count, offset, want_more, ret), + TP_ARGS(call, iter, want_more, ret), TP_STRUCT__entry( + __field(loff_t, remain ) __field(unsigned int, call ) __field(enum afs_call_state, state ) - __field(unsigned int, count ) - __field(unsigned int, offset ) __field(unsigned short, unmarshall ) __field(bool, want_more ) __field(int, ret ) @@ -190,17 +320,18 @@ TRACE_EVENT(afs_recv_data, __entry->call = call->debug_id; __entry->state = call->state; __entry->unmarshall = call->unmarshall; - __entry->count = count; - __entry->offset = offset; + __entry->remain = iov_iter_count(iter); __entry->want_more = want_more; __entry->ret = ret; ), - TP_printk("c=%08x s=%u u=%u %u/%u wm=%u ret=%d", + TP_printk("c=%08x r=%llu u=%u w=%u s=%u ret=%d", __entry->call, - __entry->state, __entry->unmarshall, - __entry->offset, __entry->count, - __entry->want_more, __entry->ret) + __entry->remain, + __entry->unmarshall, + __entry->want_more, + __entry->state, + __entry->ret) ); TRACE_EVENT(afs_notify_call, @@ -301,7 +432,7 @@ TRACE_EVENT(afs_make_fs_call, } ), - TP_printk("c=%08x %06x:%06x:%06x %s", + TP_printk("c=%08x %06llx:%06llx:%06x %s", __entry->call, __entry->fid.vid, __entry->fid.vnode, @@ -555,24 +686,70 @@ TRACE_EVENT(afs_edit_dir, ); TRACE_EVENT(afs_protocol_error, - TP_PROTO(struct afs_call *call, int error, const void *where), + TP_PROTO(struct afs_call *call, int error, enum afs_eproto_cause cause), + + TP_ARGS(call, error, cause), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(int, error ) + __field(enum afs_eproto_cause, cause ) + ), + + TP_fast_assign( + __entry->call = call ? call->debug_id : 0; + __entry->error = error; + __entry->cause = cause; + ), + + TP_printk("c=%08x r=%d %s", + __entry->call, __entry->error, + __print_symbolic(__entry->cause, afs_eproto_causes)) + ); + +TRACE_EVENT(afs_io_error, + TP_PROTO(unsigned int call, int error, enum afs_io_error where), TP_ARGS(call, error, where), TP_STRUCT__entry( __field(unsigned int, call ) __field(int, error ) - __field(const void *, where ) + __field(enum afs_io_error, where ) ), TP_fast_assign( - __entry->call = call ? call->debug_id : 0; + __entry->call = call; + __entry->error = error; + __entry->where = where; + ), + + TP_printk("c=%08x r=%d %s", + __entry->call, __entry->error, + __print_symbolic(__entry->where, afs_io_errors)) + ); + +TRACE_EVENT(afs_file_error, + TP_PROTO(struct afs_vnode *vnode, int error, enum afs_file_error where), + + TP_ARGS(vnode, error, where), + + TP_STRUCT__entry( + __field_struct(struct afs_fid, fid ) + __field(int, error ) + __field(enum afs_file_error, where ) + ), + + TP_fast_assign( + __entry->fid = vnode->fid; __entry->error = error; __entry->where = where; ), - TP_printk("c=%08x r=%d sp=%pSR", - __entry->call, __entry->error, __entry->where) + TP_printk("%llx:%llx:%x r=%d %s", + __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique, + __entry->error, + __print_symbolic(__entry->where, afs_file_errors)) ); TRACE_EVENT(afs_cm_no_server, diff --git a/lib/iov_iter.c b/lib/iov_iter.c @@ -83,6 +83,7 @@ const struct kvec *kvec; \ struct kvec v; \ iterate_kvec(i, n, v, kvec, skip, (K)) \ + } else if (unlikely(i->type & ITER_DISCARD)) { \ } else { \ const struct iovec *iov; \ struct iovec v; \ @@ -114,6 +115,8 @@ } \ i->nr_segs -= kvec - i->kvec; \ i->kvec = kvec; \ + } else if (unlikely(i->type & ITER_DISCARD)) { \ + skip += n; \ } else { \ const struct iovec *iov; \ struct iovec v; \ @@ -428,17 +431,19 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) } EXPORT_SYMBOL(iov_iter_fault_in_readable); -void iov_iter_init(struct iov_iter *i, int direction, +void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, unsigned long nr_segs, size_t count) { + WARN_ON(direction & ~(READ | WRITE)); + direction &= READ | WRITE; + /* It will get better. Eventually... */ if (uaccess_kernel()) { - direction |= ITER_KVEC; - i->type = direction; + i->type = ITER_KVEC | direction; i->kvec = (struct kvec *)iov; } else { - i->type = direction; + i->type = ITER_IOVEC | direction; i->iov = iov; } i->nr_segs = nr_segs; @@ -558,7 +563,7 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes, size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { const char *from = addr; - if (unlikely(i->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(i))) return copy_pipe_to_iter(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); @@ -658,7 +663,7 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) const char *from = addr; unsigned long rem, curr_addr, s_addr = (unsigned long) addr; - if (unlikely(i->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(i))) return copy_pipe_to_iter_mcsafe(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); @@ -692,7 +697,7 @@ EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return 0; } @@ -712,7 +717,7 @@ EXPORT_SYMBOL(_copy_from_iter); bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return false; } @@ -739,7 +744,7 @@ EXPORT_SYMBOL(_copy_from_iter_full); size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return 0; } @@ -773,7 +778,7 @@ EXPORT_SYMBOL(_copy_from_iter_nocache); size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return 0; } @@ -794,7 +799,7 @@ EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return false; } @@ -836,7 +841,9 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, size_t wanted = copy_to_iter(kaddr + offset, bytes, i); kunmap_atomic(kaddr); return wanted; - } else if (likely(!(i->type & ITER_PIPE))) + } else if (unlikely(iov_iter_is_discard(i))) + return bytes; + else if (likely(!iov_iter_is_pipe(i))) return copy_page_to_iter_iovec(page, offset, bytes, i); else return copy_page_to_iter_pipe(page, offset, bytes, i); @@ -848,7 +855,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, { if (unlikely(!page_copy_sane(page, offset, bytes))) return 0; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { WARN_ON(1); return 0; } @@ -888,7 +895,7 @@ static size_t pipe_zero(size_t bytes, struct iov_iter *i) size_t iov_iter_zero(size_t bytes, struct iov_iter *i) { - if (unlikely(i->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(i))) return pipe_zero(bytes, i); iterate_and_advance(i, bytes, v, clear_user(v.iov_base, v.iov_len), @@ -908,7 +915,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, kunmap_atomic(kaddr); return 0; } - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { kunmap_atomic(kaddr); WARN_ON(1); return 0; @@ -972,10 +979,14 @@ static void pipe_advance(struct iov_iter *i, size_t size) void iov_iter_advance(struct iov_iter *i, size_t size) { - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { pipe_advance(i, size); return; } + if (unlikely(iov_iter_is_discard(i))) { + i->count -= size; + return; + } iterate_and_advance(i, size, v, 0, 0, 0) } EXPORT_SYMBOL(iov_iter_advance); @@ -987,7 +998,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) if (WARN_ON(unroll > MAX_RW_COUNT)) return; i->count += unroll; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { struct pipe_inode_info *pipe = i->pipe; int idx = i->idx; size_t off = i->iov_offset; @@ -1011,12 +1022,14 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) pipe_truncate(i); return; } + if (unlikely(iov_iter_is_discard(i))) + return; if (unroll <= i->iov_offset) { i->iov_offset -= unroll; return; } unroll -= i->iov_offset; - if (i->type & ITER_BVEC) { + if (iov_iter_is_bvec(i)) { const struct bio_vec *bvec = i->bvec; while (1) { size_t n = (--bvec)->bv_len; @@ -1049,23 +1062,25 @@ EXPORT_SYMBOL(iov_iter_revert); */ size_t iov_iter_single_seg_count(const struct iov_iter *i) { - if (unlikely(i->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(i))) return i->count; // it is a silly place, anyway if (i->nr_segs == 1) return i->count; - else if (i->type & ITER_BVEC) + if (unlikely(iov_iter_is_discard(i))) + return i->count; + else if (iov_iter_is_bvec(i)) return min(i->count, i->bvec->bv_len - i->iov_offset); else return min(i->count, i->iov->iov_len - i->iov_offset); } EXPORT_SYMBOL(iov_iter_single_seg_count); -void iov_iter_kvec(struct iov_iter *i, int direction, +void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec, unsigned long nr_segs, size_t count) { - BUG_ON(!(direction & ITER_KVEC)); - i->type = direction; + WARN_ON(direction & ~(READ | WRITE)); + i->type = ITER_KVEC | (direction & (READ | WRITE)); i->kvec = kvec; i->nr_segs = nr_segs; i->iov_offset = 0; @@ -1073,12 +1088,12 @@ void iov_iter_kvec(struct iov_iter *i, int direction, } EXPORT_SYMBOL(iov_iter_kvec); -void iov_iter_bvec(struct iov_iter *i, int direction, +void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec, unsigned long nr_segs, size_t count) { - BUG_ON(!(direction & ITER_BVEC)); - i->type = direction; + WARN_ON(direction & ~(READ | WRITE)); + i->type = ITER_BVEC | (direction & (READ | WRITE)); i->bvec = bvec; i->nr_segs = nr_segs; i->iov_offset = 0; @@ -1086,13 +1101,13 @@ void iov_iter_bvec(struct iov_iter *i, int direction, } EXPORT_SYMBOL(iov_iter_bvec); -void iov_iter_pipe(struct iov_iter *i, int direction, +void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, size_t count) { - BUG_ON(direction != ITER_PIPE); + BUG_ON(direction != READ); WARN_ON(pipe->nrbufs == pipe->buffers); - i->type = direction; + i->type = ITER_PIPE | READ; i->pipe = pipe; i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); i->iov_offset = 0; @@ -1101,12 +1116,30 @@ void iov_iter_pipe(struct iov_iter *i, int direction, } EXPORT_SYMBOL(iov_iter_pipe); +/** + * iov_iter_discard - Initialise an I/O iterator that discards data + * @i: The iterator to initialise. + * @direction: The direction of the transfer. + * @count: The size of the I/O buffer in bytes. + * + * Set up an I/O iterator that just discards everything that's written to it. + * It's only available as a READ iterator. + */ +void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) +{ + BUG_ON(direction != READ); + i->type = ITER_DISCARD | READ; + i->count = count; + i->iov_offset = 0; +} +EXPORT_SYMBOL(iov_iter_discard); + unsigned long iov_iter_alignment(const struct iov_iter *i) { unsigned long res = 0; size_t size = i->count; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx])) return size | i->iov_offset; return size; @@ -1125,7 +1158,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) unsigned long res = 0; size_t size = i->count; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { WARN_ON(1); return ~0U; } @@ -1193,8 +1226,11 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (maxsize > i->count) maxsize = i->count; - if (unlikely(i->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(i))) return pipe_get_pages(i, pages, maxsize, maxpages, start); + if (unlikely(iov_iter_is_discard(i))) + return -EFAULT; + iterate_all_kinds(i, maxsize, v, ({ unsigned long addr = (unsigned long)v.iov_base; size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); @@ -1205,7 +1241,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, len = maxpages * PAGE_SIZE; addr &= ~(PAGE_SIZE - 1); n = DIV_ROUND_UP(len, PAGE_SIZE); - res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); + res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, pages); if (unlikely(res < 0)) return res; return (res == n ? len : res * PAGE_SIZE) - *start; @@ -1270,8 +1306,11 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, if (maxsize > i->count) maxsize = i->count; - if (unlikely(i->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(i))) return pipe_get_pages_alloc(i, pages, maxsize, start); + if (unlikely(iov_iter_is_discard(i))) + return -EFAULT; + iterate_all_kinds(i, maxsize, v, ({ unsigned long addr = (unsigned long)v.iov_base; size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); @@ -1283,7 +1322,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, p = get_pages_array(n); if (!p) return -ENOMEM; - res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); + res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, p); if (unlikely(res < 0)) { kvfree(p); return res; @@ -1313,7 +1352,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, __wsum sum, next; size_t off = 0; sum = *csum; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { WARN_ON(1); return 0; } @@ -1355,7 +1394,7 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, __wsum sum, next; size_t off = 0; sum = *csum; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { WARN_ON(1); return false; } @@ -1400,7 +1439,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, __wsum sum, next; size_t off = 0; sum = *csum; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { WARN_ON(1); /* for now */ return 0; } @@ -1442,8 +1481,10 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) if (!size) return 0; + if (unlikely(iov_iter_is_discard(i))) + return 0; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { struct pipe_inode_info *pipe = i->pipe; size_t off; int idx; @@ -1481,11 +1522,13 @@ EXPORT_SYMBOL(iov_iter_npages); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) { *new = *old; - if (unlikely(new->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(new))) { WARN_ON(1); return NULL; } - if (new->type & ITER_BVEC) + if (unlikely(iov_iter_is_discard(new))) + return NULL; + if (iov_iter_is_bvec(new)) return new->bvec = kmemdup(new->bvec, new->nr_segs * sizeof(struct bio_vec), flags); diff --git a/mm/filemap.c b/mm/filemap.c @@ -2049,7 +2049,7 @@ find_page: !mapping->a_ops->is_partially_uptodate) goto page_not_up_to_date; /* pipes can't handle partially uptodate pages */ - if (unlikely(iter->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(iter))) goto page_not_up_to_date; if (!trylock_page(page)) goto page_not_up_to_date; diff --git a/mm/page_io.c b/mm/page_io.c @@ -294,7 +294,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, }; struct iov_iter from; - iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE); + iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE); init_sync_kiocb(&kiocb, swap_file); kiocb.ki_pos = page_file_offset(page); diff --git a/net/9p/client.c b/net/9p/client.c @@ -2066,7 +2066,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) struct kvec kv = {.iov_base = data, .iov_len = count}; struct iov_iter to; - iov_iter_kvec(&to, READ | ITER_KVEC, &kv, 1, count); + iov_iter_kvec(&to, READ, &kv, 1, count); p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", fid->fid, (unsigned long long) offset, count); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c @@ -329,7 +329,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, if (!iov_iter_count(data)) return 0; - if (!(data->type & ITER_KVEC)) { + if (iov_iter_is_kvec(data)) { int n; /* * We allow only p9_max_pages pinned. We wait for the diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c @@ -467,7 +467,7 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb, iv.iov_len = skb->len; memset(&msg, 0, sizeof(msg)); - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, skb->len); + iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, skb->len); err = l2cap_chan_send(chan, &msg, skb->len); if (err > 0) { diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c @@ -63,7 +63,7 @@ static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *dat memset(&msg, 0, sizeof(msg)); - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, total_len); + iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, total_len); l2cap_chan_send(chan, &msg, total_len); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c @@ -622,7 +622,7 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data) memset(&msg, 0, sizeof(msg)); - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iv, 2, 1 + len); + iov_iter_kvec(&msg.msg_iter, WRITE, iv, 2, 1 + len); l2cap_chan_send(chan, &msg, 1 + len); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c @@ -513,7 +513,7 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) if (!buf) msg.msg_flags |= MSG_TRUNC; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len); r = sock_recvmsg(sock, &msg, msg.msg_flags); if (r == -EAGAIN) r = 0; @@ -532,7 +532,7 @@ static int ceph_tcp_recvpage(struct socket *sock, struct page *page, int r; BUG_ON(page_offset + length > PAGE_SIZE); - iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length); + iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length); r = sock_recvmsg(sock, &msg, msg.msg_flags); if (r == -EAGAIN) r = 0; @@ -594,7 +594,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, else msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ - iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size); + iov_iter_bvec(&msg.msg_iter, WRITE, &bvec, 1, size); ret = sock_sendmsg(sock, &msg); if (ret == -EAGAIN) ret = 0; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c @@ -1616,7 +1616,7 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) EnterFunction(7); /* Receive a packet */ - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, buflen); len = sock_recvmsg(sock, &msg, MSG_DONTWAIT); if (len < 0) return len; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c @@ -286,7 +286,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, */ krflags = MSG_PEEK | MSG_WAITALL; smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, + iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, sizeof(struct smc_clc_msg_hdr)); len = sock_recvmsg(smc->clcsock, &msg, krflags); if (signal_pending(current)) { @@ -325,7 +325,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, /* receive the complete CLC message */ memset(&msg, 0, sizeof(struct msghdr)); - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen); + iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, datlen); krflags = MSG_WAITALL; len = sock_recvmsg(smc->clcsock, &msg, krflags); if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) { diff --git a/net/socket.c b/net/socket.c @@ -635,7 +635,7 @@ EXPORT_SYMBOL(sock_sendmsg); int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { - iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size); + iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size); return sock_sendmsg(sock, msg); } EXPORT_SYMBOL(kernel_sendmsg); @@ -648,7 +648,7 @@ int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, if (!sock->ops->sendmsg_locked) return sock_no_sendmsg_locked(sk, msg, size); - iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size); + iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size); return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg)); } @@ -823,7 +823,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, mm_segment_t oldfs = get_fs(); int result; - iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size); + iov_iter_kvec(&msg->msg_iter, READ, vec, num, size); set_fs(KERNEL_DS); result = sock_recvmsg(sock, msg, flags); set_fs(oldfs); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c @@ -336,7 +336,7 @@ static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, rqstp->rq_xprt_hlen = 0; clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen); + iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen); if (base != 0) { iov_iter_advance(&msg.msg_iter, base); buflen -= base; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c @@ -361,7 +361,7 @@ static ssize_t xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags, struct kvec *kvec, size_t count, size_t seek) { - iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, count); + iov_iter_kvec(&msg->msg_iter, READ, kvec, 1, count); return xs_sock_recvmsg(sock, msg, flags, seek); } @@ -370,7 +370,7 @@ xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags, struct bio_vec *bvec, unsigned long nr, size_t count, size_t seek) { - iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, count); + iov_iter_bvec(&msg->msg_iter, READ, bvec, nr, count); return xs_sock_recvmsg(sock, msg, flags, seek); } diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c @@ -394,7 +394,7 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con) iov.iov_base = &s; iov.iov_len = sizeof(s); msg.msg_name = NULL; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, iov.iov_len); ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); if (ret == -EWOULDBLOCK) return -EWOULDBLOCK; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c @@ -489,7 +489,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, iov.iov_base = kaddr + offset; iov.iov_len = size; - iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, &iov, 1, size); + iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size); rc = tls_push_data(sk, &msg_iter, size, flags, TLS_RECORD_TYPE_DATA); kunmap(page); @@ -538,7 +538,7 @@ static int tls_device_push_pending_record(struct sock *sk, int flags) { struct iov_iter msg_iter; - iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, NULL, 0, 0); + iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0); return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA); } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c @@ -799,7 +799,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) struct crypto_tfm *tfm = crypto_aead_tfm(ctx->aead_send); bool async_capable = tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; unsigned char record_type = TLS_RECORD_TYPE_DATA; - bool is_kvec = msg->msg_iter.type & ITER_KVEC; + bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool eor = !(msg->msg_flags & MSG_MORE); size_t try_to_copy, copied = 0; struct sk_msg *msg_pl, *msg_en; @@ -1457,7 +1457,7 @@ int tls_sw_recvmsg(struct sock *sk, bool cmsg = false; int target, err = 0; long timeo; - bool is_kvec = msg->msg_iter.type & ITER_KVEC; + bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); int num_async = 0; flags |= nonblock;