whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 00c569b567c7f1f0da6162868fd02a9f29411805
parent f6b1495fba0b66cfa05efa0ca2370513b79b45b6
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Thu, 27 Dec 2018 17:12:30 -0800

Merge tag 'locks-v4.21-1' of git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux

Pull file locking updates from Jeff Layton:
 "The main change in this set is Neil Brown's work to reduce the
  thundering herd problem when a heavily-contended file lock is
  released.

  Previously we'd always wake up all waiters when this occurred. With
  this set, we'll now we only wake up waiters that were blocked on the
  range being released"

* tag 'locks-v4.21-1' of git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux:
  locks: Use inode_is_open_for_write
  fs/locks: remove unnecessary white space.
  fs/locks: merge posix_unblock_lock() and locks_delete_block()
  fs/locks: create a tree of dependent requests.
  fs/locks: change all *_conflict() functions to return bool.
  fs/locks: always delete_block after waiting.
  fs/locks: allow a lock request to block other requests.
  fs/locks: use properly initialized file_lock when unlocking.
  ocfs2: properly initial file_lock used for unlock.
  gfs2: properly initial file_lock used for unlock.
  NFS: use locks_copy_lock() to copy locks.
  fs/locks: split out __locks_wake_up_blocks().
  fs/locks: rename some lists and pointers.

Diffstat:
Mfs/cifs/file.c | 4++--
Mfs/gfs2/file.c | 10+++++-----
Mfs/lockd/svclock.c | 2+-
Mfs/locks.c | 344+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
Mfs/nfs/nfs4proc.c | 6++++--
Mfs/nfsd/nfs4state.c | 6+++---
Mfs/ocfs2/locks.c | 10+++++-----
Minclude/linux/fs.h | 13+++++++++----
Minclude/trace/events/filelock.h | 16++++++++--------
9 files changed, 253 insertions(+), 158 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c @@ -1103,10 +1103,10 @@ try_again: rc = posix_lock_file(file, flock, NULL); up_write(&cinode->lock_sem); if (rc == FILE_LOCK_DEFERRED) { - rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); + rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker); if (!rc) goto try_again; - posix_unblock_lock(flock); + locks_delete_block(flock); } return rc; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c @@ -1199,13 +1199,13 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) mutex_lock(&fp->f_fl_mutex); if (gfs2_holder_initialized(fl_gh)) { + struct file_lock request; if (fl_gh->gh_state == state) goto out; - locks_lock_file_wait(file, - &(struct file_lock) { - .fl_type = F_UNLCK, - .fl_flags = FL_FLOCK - }); + locks_init_lock(&request); + request.fl_type = F_UNLCK; + request.fl_flags = FL_FLOCK; + locks_lock_file_wait(file, &request); gfs2_glock_dq(fl_gh); gfs2_holder_reinit(state, flags, fl_gh); } else { diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c @@ -276,7 +276,7 @@ static int nlmsvc_unlink_block(struct nlm_block *block) dprintk("lockd: unlinking block %p...\n", block); /* Remove block from list */ - status = posix_unblock_lock(&block->b_call->a_args.lock.fl); + status = locks_delete_block(&block->b_call->a_args.lock.fl); nlmsvc_remove_block(block); return status; } diff --git a/fs/locks.c b/fs/locks.c @@ -11,11 +11,11 @@ * * Miscellaneous edits, and a total rewrite of posix_lock_file() code. * Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994 - * + * * Converted file_lock_table to a linked list from an array, which eliminates * the limits on how many active file locks are open. * Chad Page (pageone@netcom.com), November 27, 1994 - * + * * Removed dependency on file descriptors. dup()'ed file descriptors now * get the same locks as the original file descriptors, and a close() on * any file descriptor removes ALL the locks on the file for the current @@ -41,7 +41,7 @@ * with a file pointer (filp). As a result they can be shared by a parent * process and its children after a fork(). They are removed when the last * file descriptor referring to the file pointer is closed (unless explicitly - * unlocked). + * unlocked). * * FL_FLOCK locks never deadlock, an existing lock is always removed before * upgrading from shared to exclusive (or vice versa). When this happens @@ -50,7 +50,7 @@ * Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995 * * Removed some race conditions in flock_lock_file(), marked other possible - * races. Just grep for FIXME to see them. + * races. Just grep for FIXME to see them. * Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996. * * Addressed Dmitry's concerns. Deadlock checking no longer recursive. @@ -112,6 +112,46 @@ * Leases and LOCK_MAND * Matthew Wilcox <willy@debian.org>, June, 2000. * Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000. + * + * Locking conflicts and dependencies: + * If multiple threads attempt to lock the same byte (or flock the same file) + * only one can be granted the lock, and other must wait their turn. + * The first lock has been "applied" or "granted", the others are "waiting" + * and are "blocked" by the "applied" lock.. + * + * Waiting and applied locks are all kept in trees whose properties are: + * + * - the root of a tree may be an applied or waiting lock. + * - every other node in the tree is a waiting lock that + * conflicts with every ancestor of that node. + * + * Every such tree begins life as a waiting singleton which obviously + * satisfies the above properties. + * + * The only ways we modify trees preserve these properties: + * + * 1. We may add a new leaf node, but only after first verifying that it + * conflicts with all of its ancestors. + * 2. We may remove the root of a tree, creating a new singleton + * tree from the root and N new trees rooted in the immediate + * children. + * 3. If the root of a tree is not currently an applied lock, we may + * apply it (if possible). + * 4. We may upgrade the root of the tree (either extend its range, + * or upgrade its entire range from read to write). + * + * When an applied lock is modified in a way that reduces or downgrades any + * part of its range, we remove all its children (2 above). This particularly + * happens when a lock is unlocked. + * + * For each of those child trees we "wake up" the thread which is + * waiting for the lock so it can continue handling as follows: if the + * root of the tree applies, we do so (3). If it doesn't, it must + * conflict with some applied lock. We remove (wake up) all of its children + * (2), and add it is a new leaf to the tree rooted in the applied + * lock (1). We then repeat the process recursively with those + * children. + * */ #include <linux/capability.h> @@ -189,9 +229,9 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); * This lock protects the blocked_hash. Generally, if you're accessing it, you * want to be holding this lock. * - * In addition, it also protects the fl->fl_block list, and the fl->fl_next - * pointer for file_lock structures that are acting as lock requests (in - * contrast to those that are acting as records of acquired locks). + * In addition, it also protects the fl->fl_blocked_requests list, and the + * fl->fl_blocker pointer for file_lock structures that are acting as lock + * requests (in contrast to those that are acting as records of acquired locks). * * Note that when we acquire this lock in order to change the above fields, * we often hold the flc_lock as well. In certain cases, when reading the fields @@ -293,7 +333,8 @@ static void locks_init_lock_heads(struct file_lock *fl) { INIT_HLIST_NODE(&fl->fl_link); INIT_LIST_HEAD(&fl->fl_list); - INIT_LIST_HEAD(&fl->fl_block); + INIT_LIST_HEAD(&fl->fl_blocked_requests); + INIT_LIST_HEAD(&fl->fl_blocked_member); init_waitqueue_head(&fl->fl_wait); } @@ -332,7 +373,8 @@ void locks_free_lock(struct file_lock *fl) { BUG_ON(waitqueue_active(&fl->fl_wait)); BUG_ON(!list_empty(&fl->fl_list)); - BUG_ON(!list_empty(&fl->fl_block)); + BUG_ON(!list_empty(&fl->fl_blocked_requests)); + BUG_ON(!list_empty(&fl->fl_blocked_member)); BUG_ON(!hlist_unhashed(&fl->fl_link)); locks_release_private(fl); @@ -357,7 +399,6 @@ void locks_init_lock(struct file_lock *fl) memset(fl, 0, sizeof(struct file_lock)); locks_init_lock_heads(fl); } - EXPORT_SYMBOL(locks_init_lock); /* @@ -397,9 +438,26 @@ void locks_copy_lock(struct file_lock *new, struct file_lock *fl) fl->fl_ops->fl_copy_lock(new, fl); } } - EXPORT_SYMBOL(locks_copy_lock); +static void locks_move_blocks(struct file_lock *new, struct file_lock *fl) +{ + struct file_lock *f; + + /* + * As ctx->flc_lock is held, new requests cannot be added to + * ->fl_blocked_requests, so we don't need a lock to check if it + * is empty. + */ + if (list_empty(&fl->fl_blocked_requests)) + return; + spin_lock(&blocked_lock_lock); + list_splice_init(&fl->fl_blocked_requests, &new->fl_blocked_requests); + list_for_each_entry(f, &fl->fl_blocked_requests, fl_blocked_member) + f->fl_blocker = new; + spin_unlock(&blocked_lock_lock); +} + static inline int flock_translate_cmd(int cmd) { if (cmd & LOCK_MAND) return cmd & (LOCK_MAND | LOCK_RW); @@ -416,17 +474,20 @@ static inline int flock_translate_cmd(int cmd) { /* Fill in a file_lock structure with an appropriate FLOCK lock. */ static struct file_lock * -flock_make_lock(struct file *filp, unsigned int cmd) +flock_make_lock(struct file *filp, unsigned int cmd, struct file_lock *fl) { - struct file_lock *fl; int type = flock_translate_cmd(cmd); if (type < 0) return ERR_PTR(type); - - fl = locks_alloc_lock(); - if (fl == NULL) - return ERR_PTR(-ENOMEM); + + if (fl == NULL) { + fl = locks_alloc_lock(); + if (fl == NULL) + return ERR_PTR(-ENOMEM); + } else { + locks_init_lock(fl); + } fl->fl_file = filp; fl->fl_owner = filp; @@ -434,7 +495,7 @@ flock_make_lock(struct file *filp, unsigned int cmd) fl->fl_flags = FL_FLOCK; fl->fl_type = type; fl->fl_end = OFFSET_MAX; - + return fl; } @@ -666,16 +727,58 @@ static void locks_delete_global_blocked(struct file_lock *waiter) static void __locks_delete_block(struct file_lock *waiter) { locks_delete_global_blocked(waiter); - list_del_init(&waiter->fl_block); - waiter->fl_next = NULL; + list_del_init(&waiter->fl_blocked_member); + waiter->fl_blocker = NULL; } -static void locks_delete_block(struct file_lock *waiter) +static void __locks_wake_up_blocks(struct file_lock *blocker) { + while (!list_empty(&blocker->fl_blocked_requests)) { + struct file_lock *waiter; + + waiter = list_first_entry(&blocker->fl_blocked_requests, + struct file_lock, fl_blocked_member); + __locks_delete_block(waiter); + if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) + waiter->fl_lmops->lm_notify(waiter); + else + wake_up(&waiter->fl_wait); + } +} + +/** + * locks_delete_lock - stop waiting for a file lock + * @waiter: the lock which was waiting + * + * lockd/nfsd need to disconnect the lock while working on it. + */ +int locks_delete_block(struct file_lock *waiter) +{ + int status = -ENOENT; + + /* + * If fl_blocker is NULL, it won't be set again as this thread + * "owns" the lock and is the only one that might try to claim + * the lock. So it is safe to test fl_blocker locklessly. + * Also if fl_blocker is NULL, this waiter is not listed on + * fl_blocked_requests for some lock, so no other request can + * be added to the list of fl_blocked_requests for this + * request. So if fl_blocker is NULL, it is safe to + * locklessly check if fl_blocked_requests is empty. If both + * of these checks succeed, there is no need to take the lock. + */ + if (waiter->fl_blocker == NULL && + list_empty(&waiter->fl_blocked_requests)) + return status; spin_lock(&blocked_lock_lock); + if (waiter->fl_blocker) + status = 0; + __locks_wake_up_blocks(waiter); __locks_delete_block(waiter); spin_unlock(&blocked_lock_lock); + return status; } +EXPORT_SYMBOL(locks_delete_block); /* Insert waiter into blocker's block list. * We use a circular list so that processes can be easily woken up in @@ -683,26 +786,49 @@ static void locks_delete_block(struct file_lock *waiter) * it seems like the reasonable thing to do. * * Must be called with both the flc_lock and blocked_lock_lock held. The - * fl_block list itself is protected by the blocked_lock_lock, but by ensuring - * that the flc_lock is also held on insertions we can avoid taking the - * blocked_lock_lock in some cases when we see that the fl_block list is empty. + * fl_blocked_requests list itself is protected by the blocked_lock_lock, + * but by ensuring that the flc_lock is also held on insertions we can avoid + * taking the blocked_lock_lock in some cases when we see that the + * fl_blocked_requests list is empty. + * + * Rather than just adding to the list, we check for conflicts with any existing + * waiters, and add beneath any waiter that blocks the new waiter. + * Thus wakeups don't happen until needed. */ static void __locks_insert_block(struct file_lock *blocker, - struct file_lock *waiter) + struct file_lock *waiter, + bool conflict(struct file_lock *, + struct file_lock *)) { - BUG_ON(!list_empty(&waiter->fl_block)); - waiter->fl_next = blocker; - list_add_tail(&waiter->fl_block, &blocker->fl_block); + struct file_lock *fl; + BUG_ON(!list_empty(&waiter->fl_blocked_member)); + +new_blocker: + list_for_each_entry(fl, &blocker->fl_blocked_requests, fl_blocked_member) + if (conflict(fl, waiter)) { + blocker = fl; + goto new_blocker; + } + waiter->fl_blocker = blocker; + list_add_tail(&waiter->fl_blocked_member, &blocker->fl_blocked_requests); if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) locks_insert_global_blocked(waiter); + + /* The requests in waiter->fl_blocked are known to conflict with + * waiter, but might not conflict with blocker, or the requests + * and lock which block it. So they all need to be woken. + */ + __locks_wake_up_blocks(waiter); } /* Must be called with flc_lock held. */ static void locks_insert_block(struct file_lock *blocker, - struct file_lock *waiter) + struct file_lock *waiter, + bool conflict(struct file_lock *, + struct file_lock *)) { spin_lock(&blocked_lock_lock); - __locks_insert_block(blocker, waiter); + __locks_insert_block(blocker, waiter, conflict); spin_unlock(&blocked_lock_lock); } @@ -716,25 +842,15 @@ static void locks_wake_up_blocks(struct file_lock *blocker) /* * Avoid taking global lock if list is empty. This is safe since new * blocked requests are only added to the list under the flc_lock, and - * the flc_lock is always held here. Note that removal from the fl_block - * list does not require the flc_lock, so we must recheck list_empty() - * after acquiring the blocked_lock_lock. + * the flc_lock is always held here. Note that removal from the + * fl_blocked_requests list does not require the flc_lock, so we must + * recheck list_empty() after acquiring the blocked_lock_lock. */ - if (list_empty(&blocker->fl_block)) + if (list_empty(&blocker->fl_blocked_requests)) return; spin_lock(&blocked_lock_lock); - while (!list_empty(&blocker->fl_block)) { - struct file_lock *waiter; - - waiter = list_first_entry(&blocker->fl_block, - struct file_lock, fl_block); - __locks_delete_block(waiter); - if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) - waiter->fl_lmops->lm_notify(waiter); - else - wake_up(&waiter->fl_wait); - } + __locks_wake_up_blocks(blocker); spin_unlock(&blocked_lock_lock); } @@ -766,47 +882,50 @@ locks_delete_lock_ctx(struct file_lock *fl, struct list_head *dispose) /* Determine if lock sys_fl blocks lock caller_fl. Common functionality * checks for shared/exclusive status of overlapping locks. */ -static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +static bool locks_conflict(struct file_lock *caller_fl, + struct file_lock *sys_fl) { if (sys_fl->fl_type == F_WRLCK) - return 1; + return true; if (caller_fl->fl_type == F_WRLCK) - return 1; - return 0; + return true; + return false; } /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific * checking before calling the locks_conflict(). */ -static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +static bool posix_locks_conflict(struct file_lock *caller_fl, + struct file_lock *sys_fl) { /* POSIX locks owned by the same process do not conflict with * each other. */ if (posix_same_owner(caller_fl, sys_fl)) - return (0); + return false; /* Check whether they overlap */ if (!locks_overlap(caller_fl, sys_fl)) - return 0; + return false; - return (locks_conflict(caller_fl, sys_fl)); + return locks_conflict(caller_fl, sys_fl); } /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific * checking before calling the locks_conflict(). */ -static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +static bool flock_locks_conflict(struct file_lock *caller_fl, + struct file_lock *sys_fl) { /* FLOCK locks referring to the same filp do not conflict with * each other. */ if (caller_fl->fl_file == sys_fl->fl_file) - return (0); + return false; if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) - return 0; + return false; - return (locks_conflict(caller_fl, sys_fl)); + return locks_conflict(caller_fl, sys_fl); } void @@ -877,8 +996,11 @@ static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) struct file_lock *fl; hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) { - if (posix_same_owner(fl, block_fl)) - return fl->fl_next; + if (posix_same_owner(fl, block_fl)) { + while (fl->fl_blocker) + fl = fl->fl_blocker; + return fl; + } } return NULL; } @@ -965,12 +1087,13 @@ find_conflict: if (!(request->fl_flags & FL_SLEEP)) goto out; error = FILE_LOCK_DEFERRED; - locks_insert_block(fl, request); + locks_insert_block(fl, request, flock_locks_conflict); goto out; } if (request->fl_flags & FL_ACCESS) goto out; locks_copy_lock(new_fl, request); + locks_move_blocks(new_fl, request); locks_insert_lock_ctx(new_fl, &ctx->flc_flock); new_fl = NULL; error = 0; @@ -1039,12 +1162,13 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, spin_lock(&blocked_lock_lock); if (likely(!posix_locks_deadlock(request, fl))) { error = FILE_LOCK_DEFERRED; - __locks_insert_block(fl, request); + __locks_insert_block(fl, request, + posix_locks_conflict); } spin_unlock(&blocked_lock_lock); goto out; - } - } + } + } /* If we're just looking for a conflict, we're done. */ error = 0; @@ -1164,6 +1288,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, goto out; } locks_copy_lock(new_fl, request); + locks_move_blocks(new_fl, request); locks_insert_lock_ctx(new_fl, &fl->fl_list); fl = new_fl; new_fl = NULL; @@ -1237,13 +1362,11 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) error = posix_lock_inode(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); - if (!error) - continue; - - locks_delete_block(fl); - break; + error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + if (error) + break; } + locks_delete_block(fl); return error; } @@ -1324,7 +1447,7 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start, error = posix_lock_inode(inode, &fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); + error = wait_event_interruptible(fl.fl_wait, !fl.fl_blocker); if (!error) { /* * If we've been sleeping someone might have @@ -1334,13 +1457,12 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start, continue; } - locks_delete_block(&fl); break; } + locks_delete_block(&fl); return error; } - EXPORT_SYMBOL(locks_mandatory_area); #endif /* CONFIG_MANDATORY_FILE_LOCKING */ @@ -1511,14 +1633,14 @@ restart: break_time -= jiffies; if (break_time == 0) break_time++; - locks_insert_block(fl, new_fl); + locks_insert_block(fl, new_fl, leases_conflict); trace_break_lease_block(inode, new_fl); spin_unlock(&ctx->flc_lock); percpu_up_read_preempt_enable(&file_rwsem); locks_dispose_list(&dispose); error = wait_event_interruptible_timeout(new_fl->fl_wait, - !new_fl->fl_next, break_time); + !new_fl->fl_blocker, break_time); percpu_down_read_preempt_disable(&file_rwsem); spin_lock(&ctx->flc_lock); @@ -1542,7 +1664,6 @@ out: locks_free_lock(new_fl); return error; } - EXPORT_SYMBOL(__break_lease); /** @@ -1573,7 +1694,6 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time) if (has_lease) *time = current_time(inode); } - EXPORT_SYMBOL(lease_get_mtime); /** @@ -1628,8 +1748,8 @@ int fcntl_getlease(struct file *filp) /** * check_conflicting_open - see if the given dentry points to a file that has - * an existing open that would conflict with the - * desired lease. + * an existing open that would conflict with the + * desired lease. * @dentry: dentry to check * @arg: type of lease that we're trying to acquire * @flags: current lock flags @@ -1646,7 +1766,7 @@ check_conflicting_open(const struct dentry *dentry, const long arg, int flags) if (flags & FL_LAYOUT) return 0; - if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) + if ((arg == F_RDLCK) && inode_is_open_for_write(inode)) return -EAGAIN; if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || @@ -1853,7 +1973,7 @@ EXPORT_SYMBOL(generic_setlease); * @arg: type of lease to obtain * @lease: file_lock to use when adding a lease * @priv: private info for lm_setup when adding a lease (may be - * NULL if lm_setup doesn't require it) + * NULL if lm_setup doesn't require it) * * Call this to establish a lease on the file. The "lease" argument is not * used for F_UNLCK requests and may be NULL. For commands that set or alter @@ -1931,13 +2051,11 @@ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); - if (!error) - continue; - - locks_delete_block(fl); - break; + error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + if (error) + break; } + locks_delete_block(fl); return error; } @@ -2001,7 +2119,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) !(f.file->f_mode & (FMODE_READ|FMODE_WRITE))) goto out_putf; - lock = flock_make_lock(f.file, cmd); + lock = flock_make_lock(f.file, cmd, NULL); if (IS_ERR(lock)) { error = PTR_ERR(lock); goto out_putf; @@ -2143,7 +2261,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock) error = vfs_test_lock(filp, fl); if (error) goto out; - + flock->l_type = fl->fl_type; if (fl->fl_type != F_UNLCK) { error = posix_lock_to_flock(flock, fl); @@ -2210,13 +2328,11 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd, error = vfs_lock_file(filp, cmd, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); - if (!error) - continue; - - locks_delete_block(fl); - break; + error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + if (error) + break; } + locks_delete_block(fl); return error; } @@ -2476,6 +2592,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) if (!ctx || list_empty(&ctx->flc_posix)) return; + locks_init_lock(&lock); lock.fl_type = F_UNLCK; lock.fl_flags = FL_POSIX | FL_CLOSE; lock.fl_start = 0; @@ -2492,26 +2609,21 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) lock.fl_ops->fl_release_private(&lock); trace_locks_remove_posix(inode, &lock, error); } - EXPORT_SYMBOL(locks_remove_posix); /* The i_flctx must be valid when calling into here */ static void locks_remove_flock(struct file *filp, struct file_lock_context *flctx) { - struct file_lock fl = { - .fl_owner = filp, - .fl_pid = current->tgid, - .fl_file = filp, - .fl_flags = FL_FLOCK | FL_CLOSE, - .fl_type = F_UNLCK, - .fl_end = OFFSET_MAX, - }; + struct file_lock fl; struct inode *inode = locks_inode(filp); if (list_empty(&flctx->flc_flock)) return; + flock_make_lock(filp, LOCK_UN, &fl); + fl.fl_flags |= FL_CLOSE; + if (filp->f_op->flock) filp->f_op->flock(filp, F_SETLKW, &fl); else @@ -2570,27 +2682,6 @@ void locks_remove_file(struct file *filp) } /** - * posix_unblock_lock - stop waiting for a file lock - * @waiter: the lock which was waiting - * - * lockd needs to block waiting for locks. - */ -int -posix_unblock_lock(struct file_lock *waiter) -{ - int status = 0; - - spin_lock(&blocked_lock_lock); - if (waiter->fl_next) - __locks_delete_block(waiter); - else - status = -ENOENT; - spin_unlock(&blocked_lock_lock); - return status; -} -EXPORT_SYMBOL(posix_unblock_lock); - -/** * vfs_cancel_lock - file byte range unblock lock * @filp: The file to apply the unblock to * @fl: The lock to be unblocked @@ -2603,7 +2694,6 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) return filp->f_op->lock(filp, F_CANCELLK, fl); return 0; } - EXPORT_SYMBOL_GPL(vfs_cancel_lock); #ifdef CONFIG_PROC_FS @@ -2707,7 +2797,7 @@ static int locks_show(struct seq_file *f, void *v) lock_get_status(f, fl, iter->li_pos, ""); - list_for_each_entry(bfl, &fl->fl_block, fl_block) + list_for_each_entry(bfl, &fl->fl_blocked_requests, fl_blocked_member) lock_get_status(f, bfl, iter->li_pos, " ->"); return 0; @@ -2803,7 +2893,6 @@ static int __init filelock_init(void) filelock_cache = kmem_cache_create("file_lock_cache", sizeof(struct file_lock), 0, SLAB_PANIC, NULL); - for_each_possible_cpu(i) { struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i); @@ -2813,5 +2902,4 @@ static int __init filelock_init(void) return 0; } - core_initcall(filelock_init); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c @@ -6311,7 +6311,8 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); p->l_ctx = nfs_get_lock_context(ctx); - memcpy(&p->fl, fl, sizeof(p->fl)); + locks_init_lock(&p->fl); + locks_copy_lock(&p->fl, fl); p->server = NFS_SERVER(inode); return p; } @@ -6533,7 +6534,8 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->server = server; refcount_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); - memcpy(&p->fl, fl, sizeof(p->fl)); + locks_init_lock(&p->fl); + locks_copy_lock(&p->fl, fl); return p; out_free_seqid: nfs_free_seqid(p->arg.open_seqid); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c @@ -238,7 +238,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, } spin_unlock(&nn->blocked_locks_lock); if (found) - posix_unblock_lock(&found->nbl_lock); + locks_delete_block(&found->nbl_lock); return found; } @@ -293,7 +293,7 @@ remove_blocked_locks(struct nfs4_lockowner *lo) nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); - posix_unblock_lock(&nbl->nbl_lock); + locks_delete_block(&nbl->nbl_lock); free_blocked_lock(nbl); } } @@ -4863,7 +4863,7 @@ nfs4_laundromat(struct nfsd_net *nn) nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); - posix_unblock_lock(&nbl->nbl_lock); + locks_delete_block(&nbl->nbl_lock); free_blocked_lock(nbl); } out: diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c @@ -52,6 +52,7 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode, if (lockres->l_flags & OCFS2_LOCK_ATTACHED && lockres->l_level > LKM_NLMODE) { int old_level = 0; + struct file_lock request; if (lockres->l_level == LKM_EXMODE) old_level = 1; @@ -66,11 +67,10 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode, * level. */ - locks_lock_file_wait(file, - &(struct file_lock) { - .fl_type = F_UNLCK, - .fl_flags = FL_FLOCK - }); + locks_init_lock(&request); + request.fl_type = F_UNLCK; + request.fl_flags = FL_FLOCK; + locks_lock_file_wait(file, &request); ocfs2_file_unlock(file); } diff --git a/include/linux/fs.h b/include/linux/fs.h @@ -1044,10 +1044,15 @@ bool opens_in_grace(struct net *); * Obviously, the last two criteria only matter for POSIX locks. */ struct file_lock { - struct file_lock *fl_next; /* singly linked list for this inode */ + struct file_lock *fl_blocker; /* The lock, that is blocking us */ struct list_head fl_list; /* link into file_lock_context */ struct hlist_node fl_link; /* node in global lists */ - struct list_head fl_block; /* circular list of blocked processes */ + struct list_head fl_blocked_requests; /* list of requests with + * ->fl_blocker pointing here + */ + struct list_head fl_blocked_member; /* node in + * ->fl_blocker->fl_blocked_requests + */ fl_owner_t fl_owner; unsigned int fl_flags; unsigned char fl_type; @@ -1119,7 +1124,7 @@ extern void locks_remove_file(struct file *); extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); -extern int posix_unblock_lock(struct file_lock *); +extern int locks_delete_block(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); @@ -1209,7 +1214,7 @@ static inline int posix_lock_file(struct file *filp, struct file_lock *fl, return -ENOLCK; } -static inline int posix_unblock_lock(struct file_lock *waiter) +static inline int locks_delete_block(struct file_lock *waiter) { return -ENOENT; } diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h @@ -68,7 +68,7 @@ DECLARE_EVENT_CLASS(filelock_lock, __field(struct file_lock *, fl) __field(unsigned long, i_ino) __field(dev_t, s_dev) - __field(struct file_lock *, fl_next) + __field(struct file_lock *, fl_blocker) __field(fl_owner_t, fl_owner) __field(unsigned int, fl_pid) __field(unsigned int, fl_flags) @@ -82,7 +82,7 @@ DECLARE_EVENT_CLASS(filelock_lock, __entry->fl = fl ? fl : NULL; __entry->s_dev = inode->i_sb->s_dev; __entry->i_ino = inode->i_ino; - __entry->fl_next = fl ? fl->fl_next : NULL; + __entry->fl_blocker = fl ? fl->fl_blocker : NULL; __entry->fl_owner = fl ? fl->fl_owner : NULL; __entry->fl_pid = fl ? fl->fl_pid : 0; __entry->fl_flags = fl ? fl->fl_flags : 0; @@ -92,9 +92,9 @@ DECLARE_EVENT_CLASS(filelock_lock, __entry->ret = ret; ), - TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d", + TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_blocker=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d", __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev), - __entry->i_ino, __entry->fl_next, __entry->fl_owner, + __entry->i_ino, __entry->fl_blocker, __entry->fl_owner, __entry->fl_pid, show_fl_flags(__entry->fl_flags), show_fl_type(__entry->fl_type), __entry->fl_start, __entry->fl_end, __entry->ret) @@ -125,7 +125,7 @@ DECLARE_EVENT_CLASS(filelock_lease, __field(struct file_lock *, fl) __field(unsigned long, i_ino) __field(dev_t, s_dev) - __field(struct file_lock *, fl_next) + __field(struct file_lock *, fl_blocker) __field(fl_owner_t, fl_owner) __field(unsigned int, fl_flags) __field(unsigned char, fl_type) @@ -137,7 +137,7 @@ DECLARE_EVENT_CLASS(filelock_lease, __entry->fl = fl ? fl : NULL; __entry->s_dev = inode->i_sb->s_dev; __entry->i_ino = inode->i_ino; - __entry->fl_next = fl ? fl->fl_next : NULL; + __entry->fl_blocker = fl ? fl->fl_blocker : NULL; __entry->fl_owner = fl ? fl->fl_owner : NULL; __entry->fl_flags = fl ? fl->fl_flags : 0; __entry->fl_type = fl ? fl->fl_type : 0; @@ -145,9 +145,9 @@ DECLARE_EVENT_CLASS(filelock_lease, __entry->fl_downgrade_time = fl ? fl->fl_downgrade_time : 0; ), - TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_flags=%s fl_type=%s fl_break_time=%lu fl_downgrade_time=%lu", + TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_blocker=0x%p fl_owner=0x%p fl_flags=%s fl_type=%s fl_break_time=%lu fl_downgrade_time=%lu", __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev), - __entry->i_ino, __entry->fl_next, __entry->fl_owner, + __entry->i_ino, __entry->fl_blocker, __entry->fl_owner, show_fl_flags(__entry->fl_flags), show_fl_type(__entry->fl_type), __entry->fl_break_time, __entry->fl_downgrade_time)