whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 79257514f532bd6e500267a061ae2cae4e3d28d5
parent 7da4221b530f0427cc09bdaa5c5c1bd86d30583d
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Mon, 29 Oct 2018 09:19:53 -0700

Merge tag 'for_v4.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull fsnotify updates from Jan Kara:
 "Amir's patches to implement superblock fanotify watches, Xiaoming's
  patch to enable reporting of thread IDs in fanotify events instead of
  TGIDs (sadly the patch got mis-attributed to Amir and I've noticed
  only now), and a fix of possible oops on umount caused by fsnotify
  infrastructure"

* tag 'for_v4.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  fsnotify: Fix busy inodes during unmount
  fs: group frequently accessed fields of struct super_block together
  fanotify: support reporting thread id instead of process id
  fanotify: add BUILD_BUG_ON() to count the bits of fanotify constants
  fsnotify: convert runtime BUG_ON() to BUILD_BUG_ON()
  fanotify: deprecate uapi FAN_ALL_* constants
  fanotify: simplify handling of FAN_ONDIR
  fsnotify: generalize handling of extra event flags
  fanotify: fix collision of internal and uapi mark flags
  fanotify: store fanotify_init() flags in group's fanotify_data
  fanotify: add API to attach/detach super block mark
  fsnotify: send path type events to group with super block marks
  fsnotify: add super block object type

Diffstat:
Mfs/notify/fanotify/fanotify.c | 17+++++++++++------
Mfs/notify/fanotify/fanotify.h | 4++--
Mfs/notify/fanotify/fanotify_user.c | 103++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mfs/notify/fdinfo.c | 29++++++-----------------------
Mfs/notify/fsnotify.c | 42+++++++++++++++++++++++++++---------------
Mfs/notify/fsnotify.h | 11+++++++++++
Mfs/notify/inotify/inotify_user.c | 2+-
Mfs/notify/mark.c | 43+++++++++++++++++++++++++++++++++++--------
Mfs/super.c | 2+-
Minclude/linux/fanotify.h | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Minclude/linux/fs.h | 22+++++++++++++++++-----
Minclude/linux/fsnotify_backend.h | 30+++++++++++++++++++++++-------
Minclude/uapi/linux/fanotify.h | 25++++++++++++++++++-------
13 files changed, 267 insertions(+), 122 deletions(-)

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c @@ -25,7 +25,7 @@ static bool should_merge(struct fsnotify_event *old_fsn, old = FANOTIFY_E(old_fsn); new = FANOTIFY_E(new_fsn); - if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid && + if (old_fsn->inode == new_fsn->inode && old->pid == new->pid && old->path.mnt == new->path.mnt && old->path.dentry == new->path.dentry) return true; @@ -131,8 +131,8 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info, !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) return false; - if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask & - ~marks_ignored_mask) + if (event_mask & FANOTIFY_OUTGOING_EVENTS & + marks_mask & ~marks_ignored_mask) return true; return false; @@ -171,7 +171,10 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, goto out; init: __maybe_unused fsnotify_init_event(&event->fse, inode, mask); - event->tgid = get_pid(task_tgid(current)); + if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) + event->pid = get_pid(task_pid(current)); + else + event->pid = get_pid(task_tgid(current)); if (path) { event->path = *path; path_get(&event->path); @@ -205,6 +208,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 10); + if (!fanotify_should_send_event(iter_info, mask, data, data_type)) return 0; @@ -236,7 +241,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, ret = fsnotify_add_event(group, fsn_event, fanotify_merge); if (ret) { /* Permission events shouldn't be merged */ - BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS); + BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS); /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); @@ -268,7 +273,7 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) event = FANOTIFY_E(fsn_event); path_put(&event->path); - put_pid(event->tgid); + put_pid(event->pid); if (fanotify_is_perm_event(fsn_event->mask)) { kmem_cache_free(fanotify_perm_event_cachep, FANOTIFY_PE(fsn_event)); diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h @@ -19,7 +19,7 @@ struct fanotify_event_info { * during this object's lifetime */ struct path path; - struct pid *tgid; + struct pid *pid; }; /* @@ -44,7 +44,7 @@ FANOTIFY_PE(struct fsnotify_event *fse) static inline bool fanotify_is_perm_event(u32 mask) { return IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS) && - mask & FAN_ALL_PERM_EVENTS; + mask & FANOTIFY_PERM_EVENTS; } static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c @@ -131,8 +131,8 @@ static int fill_event_metadata(struct fsnotify_group *group, metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; metadata->reserved = 0; - metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS; - metadata->pid = pid_vnr(event->tgid); + metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS; + metadata->pid = pid_vnr(event->pid); if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW)) metadata->fd = FAN_NOFD; else { @@ -191,7 +191,7 @@ static int process_access_response(struct fsnotify_group *group, if (fd < 0) return -EINVAL; - if ((response & FAN_AUDIT) && !group->fanotify_data.audit) + if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT)) return -EINVAL; event = dequeue_event(group, fd); @@ -395,7 +395,7 @@ static int fanotify_release(struct inode *ignored, struct file *file) */ while (!fsnotify_notify_queue_is_empty(group)) { fsn_event = fsnotify_remove_first_event(group); - if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) { + if (!(fsn_event->mask & FANOTIFY_PERM_EVENTS)) { spin_unlock(&group->notification_lock); fsnotify_destroy_event(group, fsn_event); spin_lock(&group->notification_lock); @@ -506,18 +506,10 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, spin_lock(&fsn_mark->lock); if (!(flags & FAN_MARK_IGNORED_MASK)) { - __u32 tmask = fsn_mark->mask & ~mask; - - if (flags & FAN_MARK_ONDIR) - tmask &= ~FAN_ONDIR; - oldmask = fsn_mark->mask; - fsn_mark->mask = tmask; + fsn_mark->mask &= ~mask; } else { - __u32 tmask = fsn_mark->ignored_mask & ~mask; - if (flags & FAN_MARK_ONDIR) - tmask &= ~FAN_ONDIR; - fsn_mark->ignored_mask = tmask; + fsn_mark->ignored_mask &= ~mask; } *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask); spin_unlock(&fsn_mark->lock); @@ -563,6 +555,13 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, mask, flags); } +static int fanotify_remove_sb_mark(struct fsnotify_group *group, + struct super_block *sb, __u32 mask, + unsigned int flags) +{ + return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, flags); +} + static int fanotify_remove_inode_mark(struct fsnotify_group *group, struct inode *inode, __u32 mask, unsigned int flags) @@ -579,19 +578,10 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, spin_lock(&fsn_mark->lock); if (!(flags & FAN_MARK_IGNORED_MASK)) { - __u32 tmask = fsn_mark->mask | mask; - - if (flags & FAN_MARK_ONDIR) - tmask |= FAN_ONDIR; - oldmask = fsn_mark->mask; - fsn_mark->mask = tmask; + fsn_mark->mask |= mask; } else { - __u32 tmask = fsn_mark->ignored_mask | mask; - if (flags & FAN_MARK_ONDIR) - tmask |= FAN_ONDIR; - - fsn_mark->ignored_mask = tmask; + fsn_mark->ignored_mask |= mask; if (flags & FAN_MARK_IGNORED_SURV_MODIFY) fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; } @@ -658,6 +648,14 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags); } +static int fanotify_add_sb_mark(struct fsnotify_group *group, + struct super_block *sb, __u32 mask, + unsigned int flags) +{ + return fanotify_add_mark(group, &sb->s_fsnotify_marks, + FSNOTIFY_OBJ_TYPE_SB, mask, flags); +} + static int fanotify_add_inode_mark(struct fsnotify_group *group, struct inode *inode, __u32 mask, unsigned int flags) @@ -686,16 +684,16 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) struct user_struct *user; struct fanotify_event_info *oevent; - pr_debug("%s: flags=%d event_f_flags=%d\n", - __func__, flags, event_f_flags); + pr_debug("%s: flags=%x event_f_flags=%x\n", + __func__, flags, event_f_flags); if (!capable(CAP_SYS_ADMIN)) return -EPERM; #ifdef CONFIG_AUDITSYSCALL - if (flags & ~(FAN_ALL_INIT_FLAGS | FAN_ENABLE_AUDIT)) + if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT)) #else - if (flags & ~FAN_ALL_INIT_FLAGS) + if (flags & ~FANOTIFY_INIT_FLAGS) #endif return -EINVAL; @@ -731,6 +729,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) } group->fanotify_data.user = user; + group->fanotify_data.flags = flags; atomic_inc(&user->fanotify_listeners); group->memcg = get_mem_cgroup_from_mm(current->mm); @@ -746,7 +745,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.f_flags = event_f_flags; init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); - switch (flags & FAN_ALL_CLASS_BITS) { + switch (flags & FANOTIFY_CLASS_BITS) { case FAN_CLASS_NOTIF: group->priority = FS_PRIO_0; break; @@ -783,7 +782,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) fd = -EPERM; if (!capable(CAP_AUDIT_WRITE)) goto out_destroy_group; - group->fanotify_data.audit = true; } fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); @@ -805,7 +803,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct fsnotify_group *group; struct fd f; struct path path; - u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD; + u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; + unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; int ret; pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", @@ -815,8 +814,18 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (mask & ((__u64)0xffffffff << 32)) return -EINVAL; - if (flags & ~FAN_ALL_MARK_FLAGS) + if (flags & ~FANOTIFY_MARK_FLAGS) + return -EINVAL; + + switch (mark_type) { + case FAN_MARK_INODE: + case FAN_MARK_MOUNT: + case FAN_MARK_FILESYSTEM: + break; + default: return -EINVAL; + } + switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { case FAN_MARK_ADD: /* fallthrough */ case FAN_MARK_REMOVE: @@ -824,20 +833,15 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; break; case FAN_MARK_FLUSH: - if (flags & ~(FAN_MARK_MOUNT | FAN_MARK_FLUSH)) + if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH)) return -EINVAL; break; default: return -EINVAL; } - if (mask & FAN_ONDIR) { - flags |= FAN_MARK_ONDIR; - mask &= ~FAN_ONDIR; - } - if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) - valid_mask |= FAN_ALL_PERM_EVENTS; + valid_mask |= FANOTIFY_PERM_EVENTS; if (mask & ~valid_mask) return -EINVAL; @@ -857,14 +861,16 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, * allowed to set permissions events. */ ret = -EINVAL; - if (mask & FAN_ALL_PERM_EVENTS && + if (mask & FANOTIFY_PERM_EVENTS && group->priority == FS_PRIO_0) goto fput_and_out; if (flags & FAN_MARK_FLUSH) { ret = 0; - if (flags & FAN_MARK_MOUNT) + if (mark_type == FAN_MARK_MOUNT) fsnotify_clear_vfsmount_marks_by_group(group); + else if (mark_type == FAN_MARK_FILESYSTEM) + fsnotify_clear_sb_marks_by_group(group); else fsnotify_clear_inode_marks_by_group(group); goto fput_and_out; @@ -875,7 +881,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, goto fput_and_out; /* inode held in place by reference to path; group by fget on fd */ - if (!(flags & FAN_MARK_MOUNT)) + if (mark_type == FAN_MARK_INODE) inode = path.dentry->d_inode; else mnt = path.mnt; @@ -883,14 +889,18 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, /* create/update an inode mark */ switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { case FAN_MARK_ADD: - if (flags & FAN_MARK_MOUNT) + if (mark_type == FAN_MARK_MOUNT) ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags); + else if (mark_type == FAN_MARK_FILESYSTEM) + ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, flags); else ret = fanotify_add_inode_mark(group, inode, mask, flags); break; case FAN_MARK_REMOVE: - if (flags & FAN_MARK_MOUNT) + if (mark_type == FAN_MARK_MOUNT) ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags); + else if (mark_type == FAN_MARK_FILESYSTEM) + ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, flags); else ret = fanotify_remove_inode_mark(group, inode, mask, flags); break; @@ -934,6 +944,9 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark, */ static int __init fanotify_user_setup(void) { + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 7); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); + fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c @@ -131,37 +131,20 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n", mnt->mnt_id, mflags, mark->mask, mark->ignored_mask); + } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) { + struct super_block *sb = fsnotify_conn_sb(mark->connector); + + seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n", + sb->s_dev, mflags, mark->mask, mark->ignored_mask); } } void fanotify_show_fdinfo(struct seq_file *m, struct file *f) { struct fsnotify_group *group = f->private_data; - unsigned int flags = 0; - - switch (group->priority) { - case FS_PRIO_0: - flags |= FAN_CLASS_NOTIF; - break; - case FS_PRIO_1: - flags |= FAN_CLASS_CONTENT; - break; - case FS_PRIO_2: - flags |= FAN_CLASS_PRE_CONTENT; - break; - } - - if (group->max_events == UINT_MAX) - flags |= FAN_UNLIMITED_QUEUE; - - if (group->fanotify_data.max_marks == UINT_MAX) - flags |= FAN_UNLIMITED_MARKS; - - if (group->fanotify_data.audit) - flags |= FAN_ENABLE_AUDIT; seq_printf(m, "fanotify flags:%x event-flags:%x\n", - flags, group->fanotify_data.f_flags); + group->fanotify_data.flags, group->fanotify_data.f_flags); show_fdinfo(m, f, fanotify_fdinfo); } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c @@ -48,7 +48,7 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) * Called during unmount with no locks held, so needs to be safe against * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. */ -void fsnotify_unmount_inodes(struct super_block *sb) +static void fsnotify_unmount_inodes(struct super_block *sb) { struct inode *inode, *iput_inode = NULL; @@ -96,6 +96,15 @@ void fsnotify_unmount_inodes(struct super_block *sb) if (iput_inode) iput(iput_inode); + /* Wait for outstanding inode references from connectors */ + wait_var_event(&sb->s_fsnotify_inode_refs, + !atomic_long_read(&sb->s_fsnotify_inode_refs)); +} + +void fsnotify_sb_delete(struct super_block *sb) +{ + fsnotify_unmount_inodes(sb); + fsnotify_clear_marks_by_sb(sb); } /* @@ -190,7 +199,7 @@ static int send_to_group(struct inode *to_tell, struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL; - __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); + __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); __u32 marks_mask = 0; __u32 marks_ignored_mask = 0; struct fsnotify_mark *mark; @@ -319,15 +328,17 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, const unsigned char *file_name, u32 cookie) { struct fsnotify_iter_info iter_info = {}; - struct mount *mnt; + struct super_block *sb = NULL; + struct mount *mnt = NULL; + __u32 mnt_or_sb_mask = 0; int ret = 0; - /* global tests shouldn't care about events on child only the specific event */ - __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); + __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); - if (data_is == FSNOTIFY_EVENT_PATH) + if (data_is == FSNOTIFY_EVENT_PATH) { mnt = real_mount(((const struct path *)data)->mnt); - else - mnt = NULL; + sb = mnt->mnt.mnt_sb; + mnt_or_sb_mask = mnt->mnt_fsnotify_mask | sb->s_fsnotify_mask; + } /* * Optimization: srcu_read_lock() has a memory barrier which can @@ -337,16 +348,15 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, * need SRCU to keep them "alive". */ if (!to_tell->i_fsnotify_marks && - (!mnt || !mnt->mnt_fsnotify_marks)) + (!mnt || (!mnt->mnt_fsnotify_marks && !sb->s_fsnotify_marks))) return 0; /* * if this is a modify event we may need to clear the ignored masks - * otherwise return if neither the inode nor the vfsmount care about + * otherwise return if neither the inode nor the vfsmount/sb care about * this type of event. */ if (!(mask & FS_MODIFY) && - !(test_mask & to_tell->i_fsnotify_mask) && - !(mnt && test_mask & mnt->mnt_fsnotify_mask)) + !(test_mask & (to_tell->i_fsnotify_mask | mnt_or_sb_mask))) return 0; iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); @@ -356,11 +366,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, if (mnt) { iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); + iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] = + fsnotify_first_mark(&sb->s_fsnotify_marks); } /* - * We need to merge inode & vfsmount mark lists so that inode mark - * ignore masks are properly reflected for mount mark notifications. + * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark + * ignore masks are properly reflected for mount/sb mark notifications. * That's why this traversal is so complicated... */ while (fsnotify_iter_select_report_types(&iter_info)) { @@ -386,7 +398,7 @@ static __init int fsnotify_init(void) { int ret; - BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h @@ -21,6 +21,12 @@ static inline struct mount *fsnotify_conn_mount( return container_of(conn->obj, struct mount, mnt_fsnotify_marks); } +static inline struct super_block *fsnotify_conn_sb( + struct fsnotify_mark_connector *conn) +{ + return container_of(conn->obj, struct super_block, s_fsnotify_marks); +} + /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); @@ -43,6 +49,11 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) { fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks); } +/* run the list of all marks associated with sb and destroy them */ +static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) +{ + fsnotify_destroy_marks(&sb->s_fsnotify_marks); +} /* Wait until all marks queued for destruction are destroyed */ extern void fsnotify_wait_marks_destroyed(void); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c @@ -815,7 +815,7 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); - BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22); + BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22); inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC|SLAB_ACCOUNT); diff --git a/fs/notify/mark.c b/fs/notify/mark.c @@ -115,6 +115,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) return &fsnotify_conn_inode(conn)->i_fsnotify_mask; else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask; + else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) + return &fsnotify_conn_sb(conn)->s_fsnotify_mask; return NULL; } @@ -179,19 +181,24 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) } } -static struct inode *fsnotify_detach_connector_from_object( - struct fsnotify_mark_connector *conn) +static void *fsnotify_detach_connector_from_object( + struct fsnotify_mark_connector *conn, + unsigned int *type) { struct inode *inode = NULL; + *type = conn->type; if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) return NULL; if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = fsnotify_conn_inode(conn); inode->i_fsnotify_mask = 0; + atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs); } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; + } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { + fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; } rcu_assign_pointer(*(conn->obj), NULL); @@ -211,10 +218,29 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) fsnotify_put_group(group); } +/* Drop object reference originally held by a connector */ +static void fsnotify_drop_object(unsigned int type, void *objp) +{ + struct inode *inode; + struct super_block *sb; + + if (!objp) + return; + /* Currently only inode references are passed to be dropped */ + if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE)) + return; + inode = objp; + sb = inode->i_sb; + iput(inode); + if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs)) + wake_up_var(&sb->s_fsnotify_inode_refs); +} + void fsnotify_put_mark(struct fsnotify_mark *mark) { struct fsnotify_mark_connector *conn; - struct inode *inode = NULL; + void *objp = NULL; + unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED; bool free_conn = false; /* Catch marks that were actually never attached to object */ @@ -234,7 +260,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) conn = mark->connector; hlist_del_init_rcu(&mark->obj_list); if (hlist_empty(&conn->list)) { - inode = fsnotify_detach_connector_from_object(conn); + objp = fsnotify_detach_connector_from_object(conn, &type); free_conn = true; } else { __fsnotify_recalc_mask(conn); @@ -242,7 +268,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) mark->connector = NULL; spin_unlock(&conn->lock); - iput(inode); + fsnotify_drop_object(type, objp); if (free_conn) { spin_lock(&destroy_lock); @@ -709,7 +735,8 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp) { struct fsnotify_mark_connector *conn; struct fsnotify_mark *mark, *old_mark = NULL; - struct inode *inode; + void *objp; + unsigned int type; conn = fsnotify_grab_connector(connp); if (!conn) @@ -735,11 +762,11 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp) * mark references get dropped. It would lead to strange results such * as delaying inode deletion or blocking unmount. */ - inode = fsnotify_detach_connector_from_object(conn); + objp = fsnotify_detach_connector_from_object(conn, &type); spin_unlock(&conn->lock); if (old_mark) fsnotify_put_mark(old_mark); - iput(inode); + fsnotify_drop_object(type, objp); } /* diff --git a/fs/super.c b/fs/super.c @@ -442,7 +442,7 @@ void generic_shutdown_super(struct super_block *sb) sync_filesystem(sb); sb->s_flags &= ~SB_ACTIVE; - fsnotify_unmount_inodes(sb); + fsnotify_sb_delete(sb); cgroup_writeback_umount(); evict_inodes(sb); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h @@ -4,6 +4,61 @@ #include <uapi/linux/fanotify.h> -/* not valid from userspace, only kernel internal */ -#define FAN_MARK_ONDIR 0x00000100 +#define FAN_GROUP_FLAG(group, flag) \ + ((group)->fanotify_data.flags & (flag)) + +/* + * Flags allowed to be passed from/to userspace. + * + * We intentionally do not add new bits to the old FAN_ALL_* constants, because + * they are uapi exposed constants. If there are programs out there using + * these constant, the programs may break if re-compiled with new uapi headers + * and then run on an old kernel. + */ +#define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ + FAN_CLASS_PRE_CONTENT) + +#define FANOTIFY_INIT_FLAGS (FANOTIFY_CLASS_BITS | \ + FAN_REPORT_TID | \ + FAN_CLOEXEC | FAN_NONBLOCK | \ + FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS) + +#define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ + FAN_MARK_FILESYSTEM) + +#define FANOTIFY_MARK_FLAGS (FANOTIFY_MARK_TYPE_BITS | \ + FAN_MARK_ADD | \ + FAN_MARK_REMOVE | \ + FAN_MARK_DONT_FOLLOW | \ + FAN_MARK_ONLYDIR | \ + FAN_MARK_IGNORED_MASK | \ + FAN_MARK_IGNORED_SURV_MODIFY | \ + FAN_MARK_FLUSH) + +/* Events that user can request to be notified on */ +#define FANOTIFY_EVENTS (FAN_ACCESS | FAN_MODIFY | \ + FAN_CLOSE | FAN_OPEN) + +/* Events that require a permission response from user */ +#define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM) + +/* Extra flags that may be reported with event or control handling of events */ +#define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR) + +/* Events that may be reported to user */ +#define FANOTIFY_OUTGOING_EVENTS (FANOTIFY_EVENTS | \ + FANOTIFY_PERM_EVENTS | \ + FAN_Q_OVERFLOW) + +#define ALL_FANOTIFY_EVENT_BITS (FANOTIFY_OUTGOING_EVENTS | \ + FANOTIFY_EVENT_FLAGS) + +/* Do not use these old uapi constants internally */ +#undef FAN_ALL_CLASS_BITS +#undef FAN_ALL_INIT_FLAGS +#undef FAN_ALL_MARK_FLAGS +#undef FAN_ALL_EVENTS +#undef FAN_ALL_PERM_EVENTS +#undef FAN_ALL_OUTGOING_EVENTS + #endif /* _LINUX_FANOTIFY_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h @@ -1412,17 +1412,26 @@ struct super_block { struct sb_writers s_writers; + /* + * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and + * s_fsnotify_marks together for cache efficiency. They are frequently + * accessed and rarely modified. + */ + void *s_fs_info; /* Filesystem private info */ + + /* Granularity of c/m/atime in ns (cannot be worse than a second) */ + u32 s_time_gran; +#ifdef CONFIG_FSNOTIFY + __u32 s_fsnotify_mask; + struct fsnotify_mark_connector __rcu *s_fsnotify_marks; +#endif + char s_id[32]; /* Informational name */ uuid_t s_uuid; /* UUID */ - void *s_fs_info; /* Filesystem private info */ unsigned int s_max_links; fmode_t s_mode; - /* Granularity of c/m/atime in ns. - Cannot be worse than a second */ - u32 s_time_gran; - /* * The next field is for VFS *only*. No filesystems have any business * even looking at it. You had been warned. @@ -1447,6 +1456,9 @@ struct super_block { /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; + /* Pending fsnotify inode refs */ + atomic_long_t s_fsnotify_inode_refs; + /* Being remounted read-only */ int s_readonly_remount; diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h @@ -68,15 +68,20 @@ #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM) +/* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \ FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ - FS_OPEN_PERM | FS_ACCESS_PERM | FS_EXCL_UNLINK | \ - FS_ISDIR | FS_IN_ONESHOT | FS_DN_RENAME | \ + FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME) + +/* Extra flags that may be reported with event or control handling of events */ +#define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ FS_DN_MULTISHOT | FS_EVENT_ON_CHILD) +#define ALL_FSNOTIFY_BITS (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS) + struct fsnotify_group; struct fsnotify_event; struct fsnotify_mark; @@ -189,10 +194,10 @@ struct fsnotify_group { /* allows a group to block waiting for a userspace response */ struct list_head access_list; wait_queue_head_t access_waitq; - int f_flags; + int flags; /* flags from fanotify_init() */ + int f_flags; /* event_f_flags from fanotify_init() */ unsigned int max_marks; struct user_struct *user; - bool audit; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; @@ -206,12 +211,14 @@ struct fsnotify_group { enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, FSNOTIFY_OBJ_TYPE_VFSMOUNT, + FSNOTIFY_OBJ_TYPE_SB, FSNOTIFY_OBJ_TYPE_COUNT, FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT }; #define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE) #define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT) +#define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB) #define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1) static inline bool fsnotify_valid_obj_type(unsigned int type) @@ -255,6 +262,7 @@ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ FSNOTIFY_ITER_FUNCS(inode, INODE) FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT) +FSNOTIFY_ITER_FUNCS(sb, SB) #define fsnotify_foreach_obj_type(type) \ for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++) @@ -267,8 +275,8 @@ struct fsnotify_mark_connector; typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; /* - * Inode / vfsmount point to this structure which tracks all marks attached to - * the inode / vfsmount. The reference to inode / vfsmount is held by this + * Inode/vfsmount/sb point to this structure which tracks all marks attached to + * the inode/vfsmount/sb. The reference to inode/vfsmount/sb is held by this * structure. We destroy this structure when there are no more marks attached * to it. The structure is protected by fsnotify_mark_srcu. */ @@ -335,6 +343,7 @@ extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int dat extern int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask); extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); +extern void fsnotify_sb_delete(struct super_block *sb); extern u32 fsnotify_get_cookie(void); static inline int fsnotify_inode_watches_children(struct inode *inode) @@ -455,9 +464,13 @@ static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *gr { fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE_FL); } +/* run all the marks in a group, and clear all of the sn marks */ +static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) +{ + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB_FL); +} extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); -extern void fsnotify_unmount_inodes(struct super_block *sb); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); @@ -484,6 +497,9 @@ static inline void __fsnotify_inode_delete(struct inode *inode) static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) {} +static inline void fsnotify_sb_delete(struct super_block *sb) +{} + static inline void fsnotify_update_flags(struct dentry *dentry) {} diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h @@ -27,10 +27,12 @@ #define FAN_CLOEXEC 0x00000001 #define FAN_NONBLOCK 0x00000002 -/* These are NOT bitwise flags. Both bits are used togther. */ +/* These are NOT bitwise flags. Both bits are used together. */ #define FAN_CLASS_NOTIF 0x00000000 #define FAN_CLASS_CONTENT 0x00000004 #define FAN_CLASS_PRE_CONTENT 0x00000008 + +/* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ FAN_CLASS_PRE_CONTENT) @@ -38,6 +40,10 @@ #define FAN_UNLIMITED_MARKS 0x00000020 #define FAN_ENABLE_AUDIT 0x00000040 +/* Flags to determine fanotify event format */ +#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ + +/* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\ FAN_UNLIMITED_MARKS) @@ -47,11 +53,18 @@ #define FAN_MARK_REMOVE 0x00000002 #define FAN_MARK_DONT_FOLLOW 0x00000004 #define FAN_MARK_ONLYDIR 0x00000008 -#define FAN_MARK_MOUNT 0x00000010 +/* FAN_MARK_MOUNT is 0x00000010 */ #define FAN_MARK_IGNORED_MASK 0x00000020 #define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 #define FAN_MARK_FLUSH 0x00000080 +/* FAN_MARK_FILESYSTEM is 0x00000100 */ +/* These are NOT bitwise flags. Both bits can be used togther. */ +#define FAN_MARK_INODE 0x00000000 +#define FAN_MARK_MOUNT 0x00000010 +#define FAN_MARK_FILESYSTEM 0x00000100 + +/* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ FAN_MARK_DONT_FOLLOW |\ @@ -61,11 +74,7 @@ FAN_MARK_IGNORED_SURV_MODIFY |\ FAN_MARK_FLUSH) -/* - * All of the events - we build the list by hand so that we can add flags in - * the future and not break backward compatibility. Apps will get only the - * events that they originally wanted. Be sure to add new events here! - */ +/* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_EVENTS (FAN_ACCESS |\ FAN_MODIFY |\ FAN_CLOSE |\ @@ -74,9 +83,11 @@ /* * All events which require a permission response from userspace */ +/* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\ FAN_ACCESS_PERM) +/* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\ FAN_ALL_PERM_EVENTS |\ FAN_Q_OVERFLOW)