whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 0556161ff9069c938ca5409e1e102ac6f371a1c8
parent a9913f23f39f4aa74956587a03e78b758a10c314
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Thu,  7 Mar 2019 09:03:38 -0800

Merge tag 'fsnotify_for_v5.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull fanotify updates from Jan Kara:
 "Support for fanotify directory events and changes to make waiting for
  fanotify permission event response killable"

* tag 'fsnotify_for_v5.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: (25 commits)
  fanotify: Make waits for fanotify events only killable
  fanotify: Use interruptible wait when waiting for permission events
  fanotify: Track permission event state
  fanotify: Simplify cleaning of access_list
  fsnotify: Create function to remove event from notification list
  fanotify: Move locking inside get_one_event()
  fanotify: Fold dequeue_event() into process_access_response()
  fanotify: Select EXPORTFS
  fanotify: report FAN_ONDIR to listener with FAN_REPORT_FID
  fanotify: add support for create/attrib/move/delete events
  fanotify: support events with data type FSNOTIFY_EVENT_INODE
  fanotify: check FS_ISDIR flag instead of d_is_dir()
  fsnotify: report FS_ISDIR flag with MOVE_SELF and DELETE_SELF events
  fanotify: use vfs_get_fsid() helper instead of vfs_statfs()
  vfs: add vfs_get_fsid() helper
  fanotify: cache fsid in fsnotify_mark_connector
  fanotify: enable FAN_REPORT_FID init flag
  fanotify: copy event fid info to user
  fanotify: encode file identifier for FAN_REPORT_FID
  fanotify: open code fill_event_metadata()
  ...

Diffstat:
Mfs/notify/fanotify/Kconfig | 1+
Mfs/notify/fanotify/fanotify.c | 267++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
Mfs/notify/fanotify/fanotify.h | 116+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Mfs/notify/fanotify/fanotify_user.c | 373++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Mfs/notify/fsnotify.c | 15+++++++--------
Mfs/notify/inotify/inotify.h | 1+
Mfs/notify/inotify/inotify_fsnotify.c | 18++++++++++++++----
Mfs/notify/inotify/inotify_user.c | 5+++--
Mfs/notify/mark.c | 42+++++++++++++++++++++++++++++++++++-------
Mfs/notify/notification.c | 42++++++++++++++----------------------------
Mfs/statfs.c | 14++++++++++++++
Minclude/linux/fanotify.h | 26++++++++++++++++++++++----
Minclude/linux/fsnotify.h | 73++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
Minclude/linux/fsnotify_backend.h | 67+++++++++++++++++++++++++++++++++++++++++++------------------------
Minclude/linux/statfs.h | 3+++
Minclude/uapi/linux/fanotify.h | 29+++++++++++++++++++++++++++++
16 files changed, 828 insertions(+), 264 deletions(-)

diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig @@ -2,6 +2,7 @@ config FANOTIFY bool "Filesystem wide access notification" select FSNOTIFY select ANON_INODES + select EXPORTFS default n ---help--- Say Y here to enable fanotify support. fanotify is a file access diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c @@ -13,22 +13,40 @@ #include <linux/wait.h> #include <linux/audit.h> #include <linux/sched/mm.h> +#include <linux/statfs.h> #include "fanotify.h" static bool should_merge(struct fsnotify_event *old_fsn, struct fsnotify_event *new_fsn) { - struct fanotify_event_info *old, *new; + struct fanotify_event *old, *new; pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn); old = FANOTIFY_E(old_fsn); new = FANOTIFY_E(new_fsn); - if (old_fsn->inode == new_fsn->inode && old->pid == new->pid && - old->path.mnt == new->path.mnt && - old->path.dentry == new->path.dentry) - return true; + if (old_fsn->inode != new_fsn->inode || old->pid != new->pid || + old->fh_type != new->fh_type || old->fh_len != new->fh_len) + return false; + + if (fanotify_event_has_path(old)) { + return old->path.mnt == new->path.mnt && + old->path.dentry == new->path.dentry; + } else if (fanotify_event_has_fid(old)) { + /* + * We want to merge many dirent events in the same dir (i.e. + * creates/unlinks/renames), but we do not want to merge dirent + * events referring to subdirs with dirent events referring to + * non subdirs, otherwise, user won't be able to tell from a + * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+ + * unlink pair or rmdir+create pair of events. + */ + return (old->mask & FS_ISDIR) == (new->mask & FS_ISDIR) && + fanotify_fid_equal(&old->fid, &new->fid, old->fh_len); + } + + /* Do not merge events if we failed to encode fid */ return false; } @@ -36,20 +54,22 @@ static bool should_merge(struct fsnotify_event *old_fsn, static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) { struct fsnotify_event *test_event; + struct fanotify_event *new; pr_debug("%s: list=%p event=%p\n", __func__, list, event); + new = FANOTIFY_E(event); /* * Don't merge a permission event with any other event so that we know * the event structure we have created in fanotify_handle_event() is the * one we should check for permission response. */ - if (fanotify_is_perm_event(event->mask)) + if (fanotify_is_perm_event(new->mask)) return 0; list_for_each_entry_reverse(test_event, list, list) { if (should_merge(test_event, event)) { - test_event->mask |= event->mask; + FANOTIFY_E(test_event)->mask |= new->mask; return 1; } } @@ -57,15 +77,44 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) return 0; } +/* + * Wait for response to permission event. The function also takes care of + * freeing the permission event (or offloads that in case the wait is canceled + * by a signal). The function returns 0 in case access got allowed by userspace, + * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case + * the wait got interrupted by a signal. + */ static int fanotify_get_response(struct fsnotify_group *group, - struct fanotify_perm_event_info *event, + struct fanotify_perm_event *event, struct fsnotify_iter_info *iter_info) { int ret; pr_debug("%s: group=%p event=%p\n", __func__, group, event); - wait_event(group->fanotify_data.access_waitq, event->response); + ret = wait_event_killable(group->fanotify_data.access_waitq, + event->state == FAN_EVENT_ANSWERED); + /* Signal pending? */ + if (ret < 0) { + spin_lock(&group->notification_lock); + /* Event reported to userspace and no answer yet? */ + if (event->state == FAN_EVENT_REPORTED) { + /* Event will get freed once userspace answers to it */ + event->state = FAN_EVENT_CANCELED; + spin_unlock(&group->notification_lock); + return ret; + } + /* Event not yet reported? Just remove it. */ + if (event->state == FAN_EVENT_INIT) + fsnotify_remove_queued_event(group, &event->fae.fse); + /* + * Event may be also answered in case signal delivery raced + * with wakeup. In that case we have nothing to do besides + * freeing the event and reporting error. + */ + spin_unlock(&group->notification_lock); + goto out; + } /* userspace responded, convert to something usable */ switch (event->response & ~FAN_AUDIT) { @@ -81,11 +130,11 @@ static int fanotify_get_response(struct fsnotify_group *group, if (event->response & FAN_AUDIT) audit_fanotify(event->response & ~FAN_AUDIT); - event->response = 0; - pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, group, event, ret); - +out: + fsnotify_destroy_event(group, &event->fae.fse); + return ret; } @@ -95,11 +144,13 @@ static int fanotify_get_response(struct fsnotify_group *group, * been included within the event mask, but have not been explicitly * requested by the user, will not be present in the returned mask. */ -static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info, - u32 event_mask, const void *data, - int data_type) +static u32 fanotify_group_event_mask(struct fsnotify_group *group, + struct fsnotify_iter_info *iter_info, + u32 event_mask, const void *data, + int data_type) { __u32 marks_mask = 0, marks_ignored_mask = 0; + __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS; const struct path *path = data; struct fsnotify_mark *mark; int type; @@ -107,14 +158,14 @@ static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info, pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", __func__, iter_info->report_mask, event_mask, data, data_type); - /* If we don't have enough info to send an event to userspace say no */ - if (data_type != FSNOTIFY_EVENT_PATH) - return 0; - - /* Sorry, fanotify only gives a damn about files and dirs */ - if (!d_is_reg(path->dentry) && - !d_can_lookup(path->dentry)) - return 0; + if (!FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + /* Do we have path to open a file descriptor? */ + if (data_type != FSNOTIFY_EVENT_PATH) + return 0; + /* Path type events are only relevant for files and dirs */ + if (!d_is_reg(path->dentry) && !d_can_lookup(path->dentry)) + return 0; + } fsnotify_foreach_obj_type(type) { if (!fsnotify_iter_should_report_type(iter_info, type)) @@ -133,20 +184,106 @@ static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info, marks_ignored_mask |= mark->ignored_mask; } - if (d_is_dir(path->dentry) && + test_mask = event_mask & marks_mask & ~marks_ignored_mask; + + /* + * dirent modification events (create/delete/move) do not carry the + * child entry name/inode information. Instead, we report FAN_ONDIR + * for mkdir/rmdir so user can differentiate them from creat/unlink. + * + * For backward compatibility and consistency, do not report FAN_ONDIR + * to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR + * to user in FAN_REPORT_FID mode for all event types. + */ + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + /* Do not report FAN_ONDIR without any event */ + if (!(test_mask & ~FAN_ONDIR)) + return 0; + } else { + user_mask &= ~FAN_ONDIR; + } + + if (event_mask & FS_ISDIR && !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) return 0; - return event_mask & FANOTIFY_OUTGOING_EVENTS & marks_mask & - ~marks_ignored_mask; + return test_mask & user_mask; +} + +static int fanotify_encode_fid(struct fanotify_event *event, + struct inode *inode, gfp_t gfp, + __kernel_fsid_t *fsid) +{ + struct fanotify_fid *fid = &event->fid; + int dwords, bytes = 0; + int err, type; + + fid->ext_fh = NULL; + dwords = 0; + err = -ENOENT; + type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL); + if (!dwords) + goto out_err; + + bytes = dwords << 2; + if (bytes > FANOTIFY_INLINE_FH_LEN) { + /* Treat failure to allocate fh as failure to allocate event */ + err = -ENOMEM; + fid->ext_fh = kmalloc(bytes, gfp); + if (!fid->ext_fh) + goto out_err; + } + + type = exportfs_encode_inode_fh(inode, fanotify_fid_fh(fid, bytes), + &dwords, NULL); + err = -EINVAL; + if (!type || type == FILEID_INVALID || bytes != dwords << 2) + goto out_err; + + fid->fsid = *fsid; + event->fh_len = bytes; + + return type; + +out_err: + pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, " + "type=%d, bytes=%d, err=%i)\n", + fsid->val[0], fsid->val[1], type, bytes, err); + kfree(fid->ext_fh); + fid->ext_fh = NULL; + event->fh_len = 0; + + return FILEID_INVALID; } -struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, - struct inode *inode, u32 mask, - const struct path *path) +/* + * The inode to use as identifier when reporting fid depends on the event. + * Report the modified directory inode on dirent modification events. + * Report the "victim" inode otherwise. + * For example: + * FS_ATTRIB reports the child inode even if reported on a watched parent. + * FS_CREATE reports the modified dir inode and not the created inode. + */ +static struct inode *fanotify_fid_inode(struct inode *to_tell, u32 event_mask, + const void *data, int data_type) { - struct fanotify_event_info *event = NULL; + if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) + return to_tell; + else if (data_type == FSNOTIFY_EVENT_INODE) + return (struct inode *)data; + else if (data_type == FSNOTIFY_EVENT_PATH) + return d_inode(((struct path *)data)->dentry); + return NULL; +} + +struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, + struct inode *inode, u32 mask, + const void *data, int data_type, + __kernel_fsid_t *fsid) +{ + struct fanotify_event *event = NULL; gfp_t gfp = GFP_KERNEL_ACCOUNT; + struct inode *id = fanotify_fid_inode(inode, mask, data, data_type); /* * For queues with unlimited length lost events are not expected and @@ -160,28 +297,36 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, memalloc_use_memcg(group->memcg); if (fanotify_is_perm_event(mask)) { - struct fanotify_perm_event_info *pevent; + struct fanotify_perm_event *pevent; pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp); if (!pevent) goto out; event = &pevent->fae; pevent->response = 0; + pevent->state = FAN_EVENT_INIT; goto init; } event = kmem_cache_alloc(fanotify_event_cachep, gfp); if (!event) goto out; init: __maybe_unused - fsnotify_init_event(&event->fse, inode, mask); + fsnotify_init_event(&event->fse, inode); + event->mask = mask; if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) event->pid = get_pid(task_pid(current)); else event->pid = get_pid(task_tgid(current)); - if (path) { - event->path = *path; + event->fh_len = 0; + if (id && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + /* Report the event without a file identifier on encode error */ + event->fh_type = fanotify_encode_fid(event, id, gfp, fsid); + } else if (data_type == FSNOTIFY_EVENT_PATH) { + event->fh_type = FILEID_ROOT; + event->path = *((struct path *)data); path_get(&event->path); } else { + event->fh_type = FILEID_INVALID; event->path.mnt = NULL; event->path.dentry = NULL; } @@ -190,6 +335,29 @@ out: return event; } +/* + * Get cached fsid of the filesystem containing the object from any connector. + * All connectors are supposed to have the same fsid, but we do not verify that + * here. + */ +static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) +{ + int type; + __kernel_fsid_t fsid = {}; + + fsnotify_foreach_obj_type(type) { + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + + fsid = iter_info->marks[type]->connector->fsid; + if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) + continue; + return fsid; + } + + return fsid; +} + static int fanotify_handle_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, @@ -197,14 +365,22 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_iter_info *iter_info) { int ret = 0; - struct fanotify_event_info *event; + struct fanotify_event *event; struct fsnotify_event *fsn_event; + __kernel_fsid_t fsid = {}; BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); + BUILD_BUG_ON(FAN_ATTRIB != FS_ATTRIB); BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE); BUILD_BUG_ON(FAN_OPEN != FS_OPEN); + BUILD_BUG_ON(FAN_MOVED_TO != FS_MOVED_TO); + BUILD_BUG_ON(FAN_MOVED_FROM != FS_MOVED_FROM); + BUILD_BUG_ON(FAN_CREATE != FS_CREATE); + BUILD_BUG_ON(FAN_DELETE != FS_DELETE); + BUILD_BUG_ON(FAN_DELETE_SELF != FS_DELETE_SELF); + BUILD_BUG_ON(FAN_MOVE_SELF != FS_MOVE_SELF); BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); @@ -213,9 +389,10 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 12); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19); - mask = fanotify_group_event_mask(iter_info, mask, data, data_type); + mask = fanotify_group_event_mask(group, iter_info, mask, data, + data_type); if (!mask) return 0; @@ -231,7 +408,11 @@ static int fanotify_handle_event(struct fsnotify_group *group, return 0; } - event = fanotify_alloc_event(group, inode, mask, data); + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) + fsid = fanotify_get_fsid(iter_info); + + event = fanotify_alloc_event(group, inode, mask, data, data_type, + &fsid); ret = -ENOMEM; if (unlikely(!event)) { /* @@ -255,7 +436,6 @@ static int fanotify_handle_event(struct fsnotify_group *group, } else if (fanotify_is_perm_event(mask)) { ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event), iter_info); - fsnotify_destroy_event(group, fsn_event); } finish: if (fanotify_is_perm_event(mask)) @@ -275,12 +455,15 @@ static void fanotify_free_group_priv(struct fsnotify_group *group) static void fanotify_free_event(struct fsnotify_event *fsn_event) { - struct fanotify_event_info *event; + struct fanotify_event *event; event = FANOTIFY_E(fsn_event); - path_put(&event->path); + if (fanotify_event_has_path(event)) + path_put(&event->path); + else if (fanotify_event_has_ext_fh(event)) + kfree(event->fid.ext_fh); put_pid(event->pid); - if (fanotify_is_perm_event(fsn_event->mask)) { + if (fanotify_is_perm_event(event->mask)) { kmem_cache_free(fanotify_perm_event_cachep, FANOTIFY_PE(fsn_event)); return; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h @@ -2,26 +2,112 @@ #include <linux/fsnotify_backend.h> #include <linux/path.h> #include <linux/slab.h> +#include <linux/exportfs.h> extern struct kmem_cache *fanotify_mark_cache; extern struct kmem_cache *fanotify_event_cachep; extern struct kmem_cache *fanotify_perm_event_cachep; +/* Possible states of the permission event */ +enum { + FAN_EVENT_INIT, + FAN_EVENT_REPORTED, + FAN_EVENT_ANSWERED, + FAN_EVENT_CANCELED, +}; + +/* + * 3 dwords are sufficient for most local fs (64bit ino, 32bit generation). + * For 32bit arch, fid increases the size of fanotify_event by 12 bytes and + * fh_* fields increase the size of fanotify_event by another 4 bytes. + * For 64bit arch, fid increases the size of fanotify_fid by 8 bytes and + * fh_* fields are packed in a hole after mask. + */ +#if BITS_PER_LONG == 32 +#define FANOTIFY_INLINE_FH_LEN (3 << 2) +#else +#define FANOTIFY_INLINE_FH_LEN (4 << 2) +#endif + +struct fanotify_fid { + __kernel_fsid_t fsid; + union { + unsigned char fh[FANOTIFY_INLINE_FH_LEN]; + unsigned char *ext_fh; + }; +}; + +static inline void *fanotify_fid_fh(struct fanotify_fid *fid, + unsigned int fh_len) +{ + return fh_len <= FANOTIFY_INLINE_FH_LEN ? fid->fh : fid->ext_fh; +} + +static inline bool fanotify_fid_equal(struct fanotify_fid *fid1, + struct fanotify_fid *fid2, + unsigned int fh_len) +{ + return fid1->fsid.val[0] == fid2->fsid.val[0] && + fid1->fsid.val[1] == fid2->fsid.val[1] && + !memcmp(fanotify_fid_fh(fid1, fh_len), + fanotify_fid_fh(fid2, fh_len), fh_len); +} + /* * Structure for normal fanotify events. It gets allocated in * fanotify_handle_event() and freed when the information is retrieved by * userspace */ -struct fanotify_event_info { +struct fanotify_event { struct fsnotify_event fse; + u32 mask; /* - * We hold ref to this path so it may be dereferenced at any point - * during this object's lifetime + * Those fields are outside fanotify_fid to pack fanotify_event nicely + * on 64bit arch and to use fh_type as an indication of whether path + * or fid are used in the union: + * FILEID_ROOT (0) for path, > 0 for fid, FILEID_INVALID for neither. */ - struct path path; + u8 fh_type; + u8 fh_len; + u16 pad; + union { + /* + * We hold ref to this path so it may be dereferenced at any + * point during this object's lifetime + */ + struct path path; + /* + * With FAN_REPORT_FID, we do not hold any reference on the + * victim object. Instead we store its NFS file handle and its + * filesystem's fsid as a unique identifier. + */ + struct fanotify_fid fid; + }; struct pid *pid; }; +static inline bool fanotify_event_has_path(struct fanotify_event *event) +{ + return event->fh_type == FILEID_ROOT; +} + +static inline bool fanotify_event_has_fid(struct fanotify_event *event) +{ + return event->fh_type != FILEID_ROOT && + event->fh_type != FILEID_INVALID; +} + +static inline bool fanotify_event_has_ext_fh(struct fanotify_event *event) +{ + return fanotify_event_has_fid(event) && + event->fh_len > FANOTIFY_INLINE_FH_LEN; +} + +static inline void *fanotify_event_fh(struct fanotify_event *event) +{ + return fanotify_fid_fh(&event->fid, event->fh_len); +} + /* * Structure for permission fanotify events. It gets allocated and freed in * fanotify_handle_event() since we wait there for user response. When the @@ -29,16 +115,17 @@ struct fanotify_event_info { * group->notification_list to group->fanotify_data.access_list to wait for * user response. */ -struct fanotify_perm_event_info { - struct fanotify_event_info fae; - int response; /* userspace answer to question */ +struct fanotify_perm_event { + struct fanotify_event fae; + unsigned short response; /* userspace answer to the event */ + unsigned short state; /* state of the event */ int fd; /* fd we passed to userspace for this event */ }; -static inline struct fanotify_perm_event_info * +static inline struct fanotify_perm_event * FANOTIFY_PE(struct fsnotify_event *fse) { - return container_of(fse, struct fanotify_perm_event_info, fae.fse); + return container_of(fse, struct fanotify_perm_event, fae.fse); } static inline bool fanotify_is_perm_event(u32 mask) @@ -47,11 +134,12 @@ static inline bool fanotify_is_perm_event(u32 mask) mask & FANOTIFY_PERM_EVENTS; } -static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) +static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) { - return container_of(fse, struct fanotify_event_info, fse); + return container_of(fse, struct fanotify_event, fse); } -struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, - struct inode *inode, u32 mask, - const struct path *path); +struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, + struct inode *inode, u32 mask, + const void *data, int data_type, + __kernel_fsid_t *fsid); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c @@ -17,6 +17,8 @@ #include <linux/compat.h> #include <linux/sched/signal.h> #include <linux/memcontrol.h> +#include <linux/statfs.h> +#include <linux/exportfs.h> #include <asm/ioctls.h> @@ -47,33 +49,55 @@ struct kmem_cache *fanotify_mark_cache __read_mostly; struct kmem_cache *fanotify_event_cachep __read_mostly; struct kmem_cache *fanotify_perm_event_cachep __read_mostly; +#define FANOTIFY_EVENT_ALIGN 4 + +static int fanotify_event_info_len(struct fanotify_event *event) +{ + if (!fanotify_event_has_fid(event)) + return 0; + + return roundup(sizeof(struct fanotify_event_info_fid) + + sizeof(struct file_handle) + event->fh_len, + FANOTIFY_EVENT_ALIGN); +} + /* * Get an fsnotify notification event if one exists and is small * enough to fit in "count". Return an error pointer if the count - * is not large enough. - * - * Called with the group->notification_lock held. + * is not large enough. When permission event is dequeued, its state is + * updated accordingly. */ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, size_t count) { - assert_spin_locked(&group->notification_lock); + size_t event_size = FAN_EVENT_METADATA_LEN; + struct fsnotify_event *fsn_event = NULL; pr_debug("%s: group=%p count=%zd\n", __func__, group, count); + spin_lock(&group->notification_lock); if (fsnotify_notify_queue_is_empty(group)) - return NULL; + goto out; - if (FAN_EVENT_METADATA_LEN > count) - return ERR_PTR(-EINVAL); + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + event_size += fanotify_event_info_len( + FANOTIFY_E(fsnotify_peek_first_event(group))); + } - /* held the notification_lock the whole time, so this is the - * same event we peeked above */ - return fsnotify_remove_first_event(group); + if (event_size > count) { + fsn_event = ERR_PTR(-EINVAL); + goto out; + } + fsn_event = fsnotify_remove_first_event(group); + if (fanotify_is_perm_event(FANOTIFY_E(fsn_event)->mask)) + FANOTIFY_PE(fsn_event)->state = FAN_EVENT_REPORTED; +out: + spin_unlock(&group->notification_lock); + return fsn_event; } static int create_fd(struct fsnotify_group *group, - struct fanotify_event_info *event, + struct fanotify_event *event, struct file **file) { int client_fd; @@ -114,62 +138,32 @@ static int create_fd(struct fsnotify_group *group, return client_fd; } -static int fill_event_metadata(struct fsnotify_group *group, - struct fanotify_event_metadata *metadata, - struct fsnotify_event *fsn_event, - struct file **file) -{ - int ret = 0; - struct fanotify_event_info *event; - - pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, - group, metadata, fsn_event); - - *file = NULL; - event = container_of(fsn_event, struct fanotify_event_info, fse); - metadata->event_len = FAN_EVENT_METADATA_LEN; - metadata->metadata_len = FAN_EVENT_METADATA_LEN; - metadata->vers = FANOTIFY_METADATA_VERSION; - metadata->reserved = 0; - metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS; - metadata->pid = pid_vnr(event->pid); - if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW)) - metadata->fd = FAN_NOFD; - else { - metadata->fd = create_fd(group, event, file); - if (metadata->fd < 0) - ret = metadata->fd; - } - - return ret; -} - -static struct fanotify_perm_event_info *dequeue_event( - struct fsnotify_group *group, int fd) +/* + * Finish processing of permission event by setting it to ANSWERED state and + * drop group->notification_lock. + */ +static void finish_permission_event(struct fsnotify_group *group, + struct fanotify_perm_event *event, + unsigned int response) + __releases(&group->notification_lock) { - struct fanotify_perm_event_info *event, *return_e = NULL; - - spin_lock(&group->notification_lock); - list_for_each_entry(event, &group->fanotify_data.access_list, - fae.fse.list) { - if (event->fd != fd) - continue; + bool destroy = false; - list_del_init(&event->fae.fse.list); - return_e = event; - break; - } + assert_spin_locked(&group->notification_lock); + event->response = response; + if (event->state == FAN_EVENT_CANCELED) + destroy = true; + else + event->state = FAN_EVENT_ANSWERED; spin_unlock(&group->notification_lock); - - pr_debug("%s: found return_re=%p\n", __func__, return_e); - - return return_e; + if (destroy) + fsnotify_destroy_event(group, &event->fae.fse); } static int process_access_response(struct fsnotify_group *group, struct fanotify_response *response_struct) { - struct fanotify_perm_event_info *event; + struct fanotify_perm_event *event; int fd = response_struct->fd; int response = response_struct->response; @@ -194,48 +188,115 @@ static int process_access_response(struct fsnotify_group *group, if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT)) return -EINVAL; - event = dequeue_event(group, fd); - if (!event) - return -ENOENT; + spin_lock(&group->notification_lock); + list_for_each_entry(event, &group->fanotify_data.access_list, + fae.fse.list) { + if (event->fd != fd) + continue; - event->response = response; - wake_up(&group->fanotify_data.access_waitq); + list_del_init(&event->fae.fse.list); + finish_permission_event(group, event, response); + wake_up(&group->fanotify_data.access_waitq); + return 0; + } + spin_unlock(&group->notification_lock); + + return -ENOENT; +} + +static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) +{ + struct fanotify_event_info_fid info = { }; + struct file_handle handle = { }; + size_t fh_len = event->fh_len; + size_t len = fanotify_event_info_len(event); + + if (!len) + return 0; + + if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len)) + return -EFAULT; + + /* Copy event info fid header followed by vaiable sized file handle */ + info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID; + info.hdr.len = len; + info.fsid = event->fid.fsid; + if (copy_to_user(buf, &info, sizeof(info))) + return -EFAULT; + + buf += sizeof(info); + len -= sizeof(info); + handle.handle_type = event->fh_type; + handle.handle_bytes = fh_len; + if (copy_to_user(buf, &handle, sizeof(handle))) + return -EFAULT; + + buf += sizeof(handle); + len -= sizeof(handle); + if (copy_to_user(buf, fanotify_event_fh(event), fh_len)) + return -EFAULT; + + /* Pad with 0's */ + buf += fh_len; + len -= fh_len; + WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); + if (len > 0 && clear_user(buf, len)) + return -EFAULT; return 0; } static ssize_t copy_event_to_user(struct fsnotify_group *group, - struct fsnotify_event *event, + struct fsnotify_event *fsn_event, char __user *buf, size_t count) { - struct fanotify_event_metadata fanotify_event_metadata; - struct file *f; - int fd, ret; - - pr_debug("%s: group=%p event=%p\n", __func__, group, event); - - ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f); - if (ret < 0) - return ret; + struct fanotify_event_metadata metadata; + struct fanotify_event *event; + struct file *f = NULL; + int ret, fd = FAN_NOFD; + + pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); + + event = container_of(fsn_event, struct fanotify_event, fse); + metadata.event_len = FAN_EVENT_METADATA_LEN; + metadata.metadata_len = FAN_EVENT_METADATA_LEN; + metadata.vers = FANOTIFY_METADATA_VERSION; + metadata.reserved = 0; + metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; + metadata.pid = pid_vnr(event->pid); + + if (fanotify_event_has_path(event)) { + fd = create_fd(group, event, &f); + if (fd < 0) + return fd; + } else if (fanotify_event_has_fid(event)) { + metadata.event_len += fanotify_event_info_len(event); + } + metadata.fd = fd; - fd = fanotify_event_metadata.fd; ret = -EFAULT; /* * Sanity check copy size in case get_one_event() and * fill_event_metadata() event_len sizes ever get out of sync. */ - if (WARN_ON_ONCE(fanotify_event_metadata.event_len > count)) + if (WARN_ON_ONCE(metadata.event_len > count)) goto out_close_fd; - if (copy_to_user(buf, &fanotify_event_metadata, - fanotify_event_metadata.event_len)) + + if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) goto out_close_fd; if (fanotify_is_perm_event(event->mask)) - FANOTIFY_PE(event)->fd = fd; + FANOTIFY_PE(fsn_event)->fd = fd; - if (fd != FAN_NOFD) + if (fanotify_event_has_path(event)) { fd_install(fd, f); - return fanotify_event_metadata.event_len; + } else if (fanotify_event_has_fid(event)) { + ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN); + if (ret < 0) + return ret; + } + + return metadata.event_len; out_close_fd: if (fd != FAN_NOFD) { @@ -276,10 +337,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, add_wait_queue(&group->notification_waitq, &wait); while (1) { - spin_lock(&group->notification_lock); kevent = get_one_event(group, count); - spin_unlock(&group->notification_lock); - if (IS_ERR(kevent)) { ret = PTR_ERR(kevent); break; @@ -316,11 +374,13 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, * Permission events get queued to wait for response. Other * events can be destroyed now. */ - if (!fanotify_is_perm_event(kevent->mask)) { + if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) { fsnotify_destroy_event(group, kevent); } else { if (ret <= 0) { - FANOTIFY_PE(kevent)->response = FAN_DENY; + spin_lock(&group->notification_lock); + finish_permission_event(group, + FANOTIFY_PE(kevent), FAN_DENY); wake_up(&group->fanotify_data.access_waitq); } else { spin_lock(&group->notification_lock); @@ -370,7 +430,7 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; - struct fanotify_perm_event_info *event, *next; + struct fanotify_perm_event *event; struct fsnotify_event *fsn_event; /* @@ -385,13 +445,12 @@ static int fanotify_release(struct inode *ignored, struct file *file) * and simulate reply from userspace. */ spin_lock(&group->notification_lock); - list_for_each_entry_safe(event, next, &group->fanotify_data.access_list, - fae.fse.list) { - pr_debug("%s: found group=%p event=%p\n", __func__, group, - event); - + while (!list_empty(&group->fanotify_data.access_list)) { + event = list_first_entry(&group->fanotify_data.access_list, + struct fanotify_perm_event, fae.fse.list); list_del_init(&event->fae.fse.list); - event->response = FAN_ALLOW; + finish_permission_event(group, event, FAN_ALLOW); + spin_lock(&group->notification_lock); } /* @@ -401,13 +460,14 @@ static int fanotify_release(struct inode *ignored, struct file *file) */ while (!fsnotify_notify_queue_is_empty(group)) { fsn_event = fsnotify_remove_first_event(group); - if (!(fsn_event->mask & FANOTIFY_PERM_EVENTS)) { + if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) { spin_unlock(&group->notification_lock); fsnotify_destroy_event(group, fsn_event); - spin_lock(&group->notification_lock); } else { - FANOTIFY_PE(fsn_event)->response = FAN_ALLOW; + finish_permission_event(group, FANOTIFY_PE(fsn_event), + FAN_ALLOW); } + spin_lock(&group->notification_lock); } spin_unlock(&group->notification_lock); @@ -598,7 +658,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, - unsigned int type) + unsigned int type, + __kernel_fsid_t *fsid) { struct fsnotify_mark *mark; int ret; @@ -611,7 +672,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, return ERR_PTR(-ENOMEM); fsnotify_init_mark(mark, group); - ret = fsnotify_add_mark_locked(mark, connp, type, 0); + ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid); if (ret) { fsnotify_put_mark(mark); return ERR_PTR(ret); @@ -623,7 +684,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, static int fanotify_add_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, unsigned int type, - __u32 mask, unsigned int flags) + __u32 mask, unsigned int flags, + __kernel_fsid_t *fsid) { struct fsnotify_mark *fsn_mark; __u32 added; @@ -631,7 +693,7 @@ static int fanotify_add_mark(struct fsnotify_group *group, mutex_lock(&group->mark_mutex); fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { - fsn_mark = fanotify_add_new_mark(group, connp, type); + fsn_mark = fanotify_add_new_mark(group, connp, type, fsid); if (IS_ERR(fsn_mark)) { mutex_unlock(&group->mark_mutex); return PTR_ERR(fsn_mark); @@ -648,23 +710,23 @@ static int fanotify_add_mark(struct fsnotify_group *group, static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt, __u32 mask, - unsigned int flags) + unsigned int flags, __kernel_fsid_t *fsid) { return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags); + FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); } static int fanotify_add_sb_mark(struct fsnotify_group *group, - struct super_block *sb, __u32 mask, - unsigned int flags) + struct super_block *sb, __u32 mask, + unsigned int flags, __kernel_fsid_t *fsid) { return fanotify_add_mark(group, &sb->s_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_SB, mask, flags); + FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); } static int fanotify_add_inode_mark(struct fsnotify_group *group, struct inode *inode, __u32 mask, - unsigned int flags) + unsigned int flags, __kernel_fsid_t *fsid) { pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); @@ -679,7 +741,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, return 0; return fanotify_add_mark(group, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, mask, flags); + FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); } /* fanotify syscalls */ @@ -688,7 +750,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) struct fsnotify_group *group; int f_flags, fd; struct user_struct *user; - struct fanotify_event_info *oevent; + struct fanotify_event *oevent; pr_debug("%s: flags=%x event_f_flags=%x\n", __func__, flags, event_f_flags); @@ -715,6 +777,10 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) return -EINVAL; } + if ((flags & FAN_REPORT_FID) && + (flags & FANOTIFY_CLASS_BITS) != FAN_CLASS_NOTIF) + return -EINVAL; + user = get_current_user(); if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) { free_uid(user); @@ -739,7 +805,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) atomic_inc(&user->fanotify_listeners); group->memcg = get_mem_cgroup_from_mm(current->mm); - oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL); + oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL, + FSNOTIFY_EVENT_NONE, NULL); if (unlikely(!oevent)) { fd = -ENOMEM; goto out_destroy_group; @@ -801,6 +868,48 @@ out_destroy_group: return fd; } +/* Check if filesystem can encode a unique fid */ +static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) +{ + __kernel_fsid_t root_fsid; + int err; + + /* + * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). + */ + err = vfs_get_fsid(path->dentry, fsid); + if (err) + return err; + + if (!fsid->val[0] && !fsid->val[1]) + return -ENODEV; + + /* + * Make sure path is not inside a filesystem subvolume (e.g. btrfs) + * which uses a different fsid than sb root. + */ + err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid); + if (err) + return err; + + if (root_fsid.val[0] != fsid->val[0] || + root_fsid.val[1] != fsid->val[1]) + return -EXDEV; + + /* + * We need to make sure that the file system supports at least + * encoding a file handle so user can use name_to_handle_at() to + * compare fid returned with event to the file handle of watched + * objects. However, name_to_handle_at() requires that the + * filesystem also supports decoding file handles. + */ + if (!path->dentry->d_sb->s_export_op || + !path->dentry->d_sb->s_export_op->fh_to_dentry) + return -EOPNOTSUPP; + + return 0; +} + static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, int dfd, const char __user *pathname) { @@ -809,6 +918,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct fsnotify_group *group; struct fd f; struct path path; + __kernel_fsid_t __fsid, *fsid = NULL; u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; int ret; @@ -871,6 +981,18 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, group->priority == FS_PRIO_0) goto fput_and_out; + /* + * Events with data type inode do not carry enough information to report + * event->fd, so we do not allow setting a mask for inode events unless + * group supports reporting fid. + * inode events are not supported on a mount mark, because they do not + * carry enough information (i.e. path) to be filtered by mount point. + */ + if (mask & FANOTIFY_INODE_EVENTS && + (!FAN_GROUP_FLAG(group, FAN_REPORT_FID) || + mark_type == FAN_MARK_MOUNT)) + goto fput_and_out; + if (flags & FAN_MARK_FLUSH) { ret = 0; if (mark_type == FAN_MARK_MOUNT) @@ -886,6 +1008,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (ret) goto fput_and_out; + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + ret = fanotify_test_fid(&path, &__fsid); + if (ret) + goto path_put_and_out; + + fsid = &__fsid; + } + /* inode held in place by reference to path; group by fget on fd */ if (mark_type == FAN_MARK_INODE) inode = path.dentry->d_inode; @@ -896,24 +1026,31 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { case FAN_MARK_ADD: if (mark_type == FAN_MARK_MOUNT) - ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags); + ret = fanotify_add_vfsmount_mark(group, mnt, mask, + flags, fsid); else if (mark_type == FAN_MARK_FILESYSTEM) - ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, flags); + ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, + flags, fsid); else - ret = fanotify_add_inode_mark(group, inode, mask, flags); + ret = fanotify_add_inode_mark(group, inode, mask, + flags, fsid); break; case FAN_MARK_REMOVE: if (mark_type == FAN_MARK_MOUNT) - ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags); + ret = fanotify_remove_vfsmount_mark(group, mnt, mask, + flags); else if (mark_type == FAN_MARK_FILESYSTEM) - ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, flags); + ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, + flags); else - ret = fanotify_remove_inode_mark(group, inode, mask, flags); + ret = fanotify_remove_inode_mark(group, inode, mask, + flags); break; default: ret = -EINVAL; } +path_put_and_out: path_put(&path); fput_and_out: fdput(f); @@ -950,15 +1087,15 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark, */ static int __init fanotify_user_setup(void) { - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 7); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 8); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); - fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); + fanotify_event_cachep = KMEM_CACHE(fanotify_event, SLAB_PANIC); if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { fanotify_perm_event_cachep = - KMEM_CACHE(fanotify_perm_event_info, SLAB_PANIC); + KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); } return 0; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c @@ -328,16 +328,15 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, const unsigned char *file_name, u32 cookie) { struct fsnotify_iter_info iter_info = {}; - struct super_block *sb = NULL; + struct super_block *sb = to_tell->i_sb; struct mount *mnt = NULL; - __u32 mnt_or_sb_mask = 0; + __u32 mnt_or_sb_mask = sb->s_fsnotify_mask; int ret = 0; __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); if (data_is == FSNOTIFY_EVENT_PATH) { mnt = real_mount(((const struct path *)data)->mnt); - sb = mnt->mnt.mnt_sb; - mnt_or_sb_mask = mnt->mnt_fsnotify_mask | sb->s_fsnotify_mask; + mnt_or_sb_mask |= mnt->mnt_fsnotify_mask; } /* An event "on child" is not intended for a mount/sb mark */ if (mask & FS_EVENT_ON_CHILD) @@ -350,8 +349,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, * SRCU because we have no references to any objects and do not * need SRCU to keep them "alive". */ - if (!to_tell->i_fsnotify_marks && - (!mnt || (!mnt->mnt_fsnotify_marks && !sb->s_fsnotify_marks))) + if (!to_tell->i_fsnotify_marks && !sb->s_fsnotify_marks && + (!mnt || !mnt->mnt_fsnotify_marks)) return 0; /* * if this is a modify event we may need to clear the ignored masks @@ -366,11 +365,11 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = fsnotify_first_mark(&to_tell->i_fsnotify_marks); + iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] = + fsnotify_first_mark(&sb->s_fsnotify_marks); if (mnt) { iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); - iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] = - fsnotify_first_mark(&sb->s_fsnotify_marks); } /* diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h @@ -5,6 +5,7 @@ struct inotify_event_info { struct fsnotify_event fse; + u32 mask; int wd; u32 sync_cookie; int name_len; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c @@ -43,11 +43,11 @@ static bool event_compare(struct fsnotify_event *old_fsn, { struct inotify_event_info *old, *new; - if (old_fsn->mask & FS_IN_IGNORED) - return false; old = INOTIFY_E(old_fsn); new = INOTIFY_E(new_fsn); - if ((old_fsn->mask == new_fsn->mask) && + if (old->mask & FS_IN_IGNORED) + return false; + if ((old->mask == new->mask) && (old_fsn->inode == new_fsn->inode) && (old->name_len == new->name_len) && (!old->name_len || !strcmp(old->name, new->name))) @@ -113,8 +113,18 @@ int inotify_handle_event(struct fsnotify_group *group, return -ENOMEM; } + /* + * We now report FS_ISDIR flag with MOVE_SELF and DELETE_SELF events + * for fanotify. inotify never reported IN_ISDIR with those events. + * It looks like an oversight, but to avoid the risk of breaking + * existing inotify programs, mask the flag out from those events. + */ + if (mask & (IN_MOVE_SELF | IN_DELETE_SELF)) + mask &= ~IN_ISDIR; + fsn_event = &event->fse; - fsnotify_init_event(fsn_event, inode, mask); + fsnotify_init_event(fsn_event, inode); + event->mask = mask; event->wd = i_mark->wd; event->sync_cookie = cookie; event->name_len = len; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c @@ -189,7 +189,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, */ pad_name_len = round_event_name_len(fsn_event); inotify_event.len = pad_name_len; - inotify_event.mask = inotify_mask_to_arg(fsn_event->mask); + inotify_event.mask = inotify_mask_to_arg(event->mask); inotify_event.wd = event->wd; inotify_event.cookie = event->sync_cookie; @@ -634,7 +634,8 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) return ERR_PTR(-ENOMEM); } group->overflow_event = &oevent->fse; - fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); + fsnotify_init_event(group->overflow_event, NULL); + oevent->mask = FS_Q_OVERFLOW; oevent->wd = -1; oevent->sync_cookie = 0; oevent->name_len = 0; diff --git a/fs/notify/mark.c b/fs/notify/mark.c @@ -82,6 +82,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/srcu.h> +#include <linux/ratelimit.h> #include <linux/atomic.h> @@ -481,7 +482,8 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) } static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, - unsigned int type) + unsigned int type, + __kernel_fsid_t *fsid) { struct inode *inode = NULL; struct fsnotify_mark_connector *conn; @@ -493,6 +495,11 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, INIT_HLIST_HEAD(&conn->list); conn->type = type; conn->obj = connp; + /* Cache fsid of filesystem containing the object */ + if (fsid) + conn->fsid = *fsid; + else + conn->fsid.val[0] = conn->fsid.val[1] = 0; if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) inode = igrab(fsnotify_conn_inode(conn)); /* @@ -544,7 +551,7 @@ out: */ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int type, - int allow_dups) + int allow_dups, __kernel_fsid_t *fsid) { struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark_connector *conn; @@ -553,15 +560,36 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, if (WARN_ON(!fsnotify_valid_obj_type(type))) return -EINVAL; + + /* Backend is expected to check for zero fsid (e.g. tmpfs) */ + if (fsid && WARN_ON_ONCE(!fsid->val[0] && !fsid->val[1])) + return -ENODEV; + restart: spin_lock(&mark->lock); conn = fsnotify_grab_connector(connp); if (!conn) { spin_unlock(&mark->lock); - err = fsnotify_attach_connector_to_object(connp, type); + err = fsnotify_attach_connector_to_object(connp, type, fsid); if (err) return err; goto restart; + } else if (fsid && (conn->fsid.val[0] || conn->fsid.val[1]) && + (fsid->val[0] != conn->fsid.val[0] || + fsid->val[1] != conn->fsid.val[1])) { + /* + * Backend is expected to check for non uniform fsid + * (e.g. btrfs), but maybe we missed something? + * Only allow setting conn->fsid once to non zero fsid. + * inotify and non-fid fanotify groups do not set nor test + * conn->fsid. + */ + pr_warn_ratelimited("%s: fsid mismatch on object of type %u: " + "%x.%x != %x.%x\n", __func__, conn->type, + fsid->val[0], fsid->val[1], + conn->fsid.val[0], conn->fsid.val[1]); + err = -EXDEV; + goto out_err; } /* is mark the first mark? */ @@ -606,7 +634,7 @@ out_err: */ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int type, - int allow_dups) + int allow_dups, __kernel_fsid_t *fsid) { struct fsnotify_group *group = mark->group; int ret = 0; @@ -627,7 +655,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - ret = fsnotify_add_mark_list(mark, connp, type, allow_dups); + ret = fsnotify_add_mark_list(mark, connp, type, allow_dups, fsid); if (ret) goto err; @@ -648,13 +676,13 @@ err: } int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int type, int allow_dups) + unsigned int type, int allow_dups, __kernel_fsid_t *fsid) { int ret; struct fsnotify_group *group = mark->group; mutex_lock(&group->mark_mutex); - ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups); + ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups, fsid); mutex_unlock(&group->mark_mutex); return ret; } diff --git a/fs/notify/notification.c b/fs/notify/notification.c @@ -71,7 +71,7 @@ void fsnotify_destroy_event(struct fsnotify_group *group, struct fsnotify_event *event) { /* Overflow events are per-group and we don't want to free them */ - if (!event || event->mask == FS_Q_OVERFLOW) + if (!event || event == group->overflow_event) return; /* * If the event is still queued, we have a problem... Do an unreliable @@ -141,6 +141,18 @@ queue: return ret; } +void fsnotify_remove_queued_event(struct fsnotify_group *group, + struct fsnotify_event *event) +{ + assert_spin_locked(&group->notification_lock); + /* + * We need to init list head for the case of overflow event so that + * check in fsnotify_add_event() works + */ + list_del_init(&event->list); + group->q_len--; +} + /* * Remove and return the first event from the notification list. It is the * responsibility of the caller to destroy the obtained event @@ -155,13 +167,7 @@ struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group) event = list_first_entry(&group->notification_list, struct fsnotify_event, list); - /* - * We need to init list head for the case of overflow event so that - * check in fsnotify_add_event() works - */ - list_del_init(&event->list); - group->q_len--; - + fsnotify_remove_queued_event(group, event); return event; } @@ -194,23 +200,3 @@ void fsnotify_flush_notify(struct fsnotify_group *group) } spin_unlock(&group->notification_lock); } - -/* - * fsnotify_create_event - Allocate a new event which will be sent to each - * group's handle_event function if the group was interested in this - * particular event. - * - * @inode the inode which is supposed to receive the event (sometimes a - * parent of the inode to which the event happened. - * @mask what actually happened. - * @data pointer to the object which was actually affected - * @data_type flag indication if the data is a file, path, inode, nothing... - * @name the filename, if available - */ -void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode, - u32 mask) -{ - INIT_LIST_HEAD(&event->list); - event->inode = inode; - event->mask = mask; -} diff --git a/fs/statfs.c b/fs/statfs.c @@ -67,6 +67,20 @@ static int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf) return retval; } +int vfs_get_fsid(struct dentry *dentry, __kernel_fsid_t *fsid) +{ + struct kstatfs st; + int error; + + error = statfs_by_dentry(dentry, &st); + if (error) + return error; + + *fsid = st.f_fsid; + return 0; +} +EXPORT_SYMBOL(vfs_get_fsid); + int vfs_statfs(const struct path *path, struct kstatfs *buf) { int error; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h @@ -19,7 +19,7 @@ FAN_CLASS_PRE_CONTENT) #define FANOTIFY_INIT_FLAGS (FANOTIFY_CLASS_BITS | \ - FAN_REPORT_TID | \ + FAN_REPORT_TID | FAN_REPORT_FID | \ FAN_CLOEXEC | FAN_NONBLOCK | \ FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS) @@ -35,10 +35,28 @@ FAN_MARK_IGNORED_SURV_MODIFY | \ FAN_MARK_FLUSH) -/* Events that user can request to be notified on */ -#define FANOTIFY_EVENTS (FAN_ACCESS | FAN_MODIFY | \ +/* + * Events that can be reported with data type FSNOTIFY_EVENT_PATH. + * Note that FAN_MODIFY can also be reported with data type + * FSNOTIFY_EVENT_INODE. + */ +#define FANOTIFY_PATH_EVENTS (FAN_ACCESS | FAN_MODIFY | \ FAN_CLOSE | FAN_OPEN | FAN_OPEN_EXEC) +/* + * Directory entry modification events - reported only to directory + * where entry is modified and not to a watching parent. + */ +#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE) + +/* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */ +#define FANOTIFY_INODE_EVENTS (FANOTIFY_DIRENT_EVENTS | \ + FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF) + +/* Events that user can request to be notified on */ +#define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \ + FANOTIFY_INODE_EVENTS) + /* Events that require a permission response from user */ #define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \ FAN_OPEN_EXEC_PERM) @@ -49,7 +67,7 @@ /* Events that may be reported to user */ #define FANOTIFY_OUTGOING_EVENTS (FANOTIFY_EVENTS | \ FANOTIFY_PERM_EVENTS | \ - FAN_Q_OVERFLOW) + FAN_Q_OVERFLOW | FAN_ONDIR) #define ALL_FANOTIFY_EVENT_BITS (FANOTIFY_OUTGOING_EVENTS | \ FANOTIFY_EVENT_FLAGS) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h @@ -17,8 +17,22 @@ #include <linux/slab.h> #include <linux/bug.h> +/* + * Notify this @dir inode about a change in the directory entry @dentry. + * + * Unlike fsnotify_parent(), the event will be reported regardless of the + * FS_EVENT_ON_CHILD mask on the parent inode. + */ +static inline int fsnotify_dirent(struct inode *dir, struct dentry *dentry, + __u32 mask) +{ + return fsnotify(dir, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, + dentry->d_name.name, 0); +} + /* Notify this dentry's parent about a child's events. */ -static inline int fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask) +static inline int fsnotify_parent(const struct path *path, + struct dentry *dentry, __u32 mask) { if (!dentry) dentry = path->dentry; @@ -65,6 +79,9 @@ static inline int fsnotify_perm(struct file *file, int mask) fsnotify_mask = FS_ACCESS_PERM; } + if (S_ISDIR(inode->i_mode)) + fsnotify_mask |= FS_ISDIR; + return fsnotify_path(inode, path, fsnotify_mask); } @@ -73,7 +90,12 @@ static inline int fsnotify_perm(struct file *file, int mask) */ static inline void fsnotify_link_count(struct inode *inode) { - fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + __u32 mask = FS_ATTRIB; + + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -81,12 +103,14 @@ static inline void fsnotify_link_count(struct inode *inode) */ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, const unsigned char *old_name, - int isdir, struct inode *target, struct dentry *moved) + int isdir, struct inode *target, + struct dentry *moved) { struct inode *source = moved->d_inode; u32 fs_cookie = fsnotify_get_cookie(); - __u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM); - __u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO); + __u32 old_dir_mask = FS_MOVED_FROM; + __u32 new_dir_mask = FS_MOVED_TO; + __u32 mask = FS_MOVE_SELF; const unsigned char *new_name = moved->d_name.name; if (old_dir == new_dir) @@ -95,6 +119,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, if (isdir) { old_dir_mask |= FS_ISDIR; new_dir_mask |= FS_ISDIR; + mask |= FS_ISDIR; } fsnotify(old_dir, old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_name, @@ -106,7 +131,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, fsnotify_link_count(target); if (source) - fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0); + fsnotify(source, mask, source, FSNOTIFY_EVENT_INODE, NULL, 0); audit_inode_child(new_dir, moved, AUDIT_TYPE_CHILD_CREATE); } @@ -128,15 +153,35 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt) /* * fsnotify_nameremove - a filename was removed from a directory + * + * This is mostly called under parent vfs inode lock so name and + * dentry->d_parent should be stable. However there are some corner cases where + * inode lock is not held. So to be on the safe side and be reselient to future + * callers and out of tree users of d_delete(), we do not assume that d_parent + * and d_name are stable and we use dget_parent() and + * take_dentry_name_snapshot() to grab stable references. */ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) { + struct dentry *parent; + struct name_snapshot name; __u32 mask = FS_DELETE; + /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */ + if (IS_ROOT(dentry)) + return; + if (isdir) mask |= FS_ISDIR; - fsnotify_parent(NULL, dentry, mask); + parent = dget_parent(dentry); + take_dentry_name_snapshot(&name, dentry); + + fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, + name.name, 0); + + release_dentry_name_snapshot(&name); + dput(parent); } /* @@ -144,7 +189,12 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) */ static inline void fsnotify_inoderemove(struct inode *inode) { - fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + __u32 mask = FS_DELETE_SELF; + + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); __fsnotify_inode_delete(inode); } @@ -155,7 +205,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) { audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); + fsnotify_dirent(inode, dentry, FS_CREATE); } /* @@ -176,12 +226,9 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct */ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) { - __u32 mask = (FS_CREATE | FS_ISDIR); - struct inode *d_inode = dentry->d_inode; - audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); + fsnotify_dirent(inode, dentry, FS_CREATE | FS_ISDIR); } /* diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h @@ -59,27 +59,33 @@ * dnotify and inotify. */ #define FS_EVENT_ON_CHILD 0x08000000 -/* This is a list of all events that may get sent to a parernt based on fs event - * happening to inodes inside that directory */ -#define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ - FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ - FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ - FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM | \ - FS_OPEN_EXEC | FS_OPEN_EXEC_PERM) - #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) +/* + * Directory entry modification events - reported only to directory + * where entry is modified and not to a watching parent. + * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event + * when a directory entry inside a child subdir changes. + */ +#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) + #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) +/* + * This is a list of all events that may get sent to a parent based on fs event + * happening to inodes inside that directory. + */ +#define FS_EVENTS_POSS_ON_CHILD (ALL_FSNOTIFY_PERM_EVENTS | \ + FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ + FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | \ + FS_OPEN | FS_OPEN_EXEC) + /* Events that can be reported to backends */ -#define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ - FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \ - FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \ - FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \ - FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ - FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME | \ - FS_OPEN_EXEC | FS_OPEN_EXEC_PERM) +#define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ + FS_EVENTS_POSS_ON_CHILD | \ + FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \ + FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED) /* Extra flags that may be reported with event or control handling of events */ #define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ @@ -129,7 +135,6 @@ struct fsnotify_event { struct list_head list; /* inode may ONLY be dereferenced during handle_event(). */ struct inode *inode; /* either the inode the event happened to or its parent */ - u32 mask; /* the type of access, bitwise OR for FS_* event types */ }; /* @@ -288,6 +293,7 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; struct fsnotify_mark_connector { spinlock_t lock; unsigned int type; /* Type of object [lock] */ + __kernel_fsid_t fsid; /* fsid of filesystem containing object */ union { /* Object pointer [lock] */ fsnotify_connp_t *obj; @@ -416,6 +422,9 @@ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group); /* return AND dequeue the first event on the notification queue */ extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group); +/* Remove event queued in the notification list */ +extern void fsnotify_remove_queued_event(struct fsnotify_group *group, + struct fsnotify_event *event); /* functions used to manipulate the marks attached to inodes */ @@ -428,28 +437,35 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark, /* Find mark belonging to given group in the list of marks */ extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, struct fsnotify_group *group); +/* Get cached fsid of filesystem containing object */ +extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, + __kernel_fsid_t *fsid); /* attach the mark to the object */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int type, - int allow_dups); + int allow_dups, __kernel_fsid_t *fsid); extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int type, - int allow_dups); + fsnotify_connp_t *connp, + unsigned int type, int allow_dups, + __kernel_fsid_t *fsid); + /* attach the mark to the inode */ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct inode *inode, int allow_dups) { return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, allow_dups); + FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL); } static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, struct inode *inode, int allow_dups) { return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, allow_dups); + FSNOTIFY_OBJ_TYPE_INODE, allow_dups, + NULL); } + /* given a group and a mark, flag mark to be freed when all references are dropped */ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); @@ -479,9 +495,12 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); -/* put here because inotify does some weird stuff when destroying watches */ -extern void fsnotify_init_event(struct fsnotify_event *event, - struct inode *to_tell, u32 mask); +static inline void fsnotify_init_event(struct fsnotify_event *event, + struct inode *inode) +{ + INIT_LIST_HEAD(&event->list); + event->inode = inode; +} #else diff --git a/include/linux/statfs.h b/include/linux/statfs.h @@ -41,4 +41,7 @@ struct kstatfs { #define ST_NODIRATIME 0x0800 /* do not update directory access times */ #define ST_RELATIME 0x1000 /* update atime relative to mtime/ctime */ +struct dentry; +extern int vfs_get_fsid(struct dentry *dentry, __kernel_fsid_t *fsid); + #endif diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h @@ -7,9 +7,16 @@ /* the following events that user-space can register for */ #define FAN_ACCESS 0x00000001 /* File was accessed */ #define FAN_MODIFY 0x00000002 /* File was modified */ +#define FAN_ATTRIB 0x00000004 /* Metadata changed */ #define FAN_CLOSE_WRITE 0x00000008 /* Writtable file closed */ #define FAN_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ #define FAN_OPEN 0x00000020 /* File was opened */ +#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */ +#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */ +#define FAN_CREATE 0x00000100 /* Subfile was created */ +#define FAN_DELETE 0x00000200 /* Subfile was deleted */ +#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */ +#define FAN_MOVE_SELF 0x00000800 /* Self was moved */ #define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ @@ -24,6 +31,7 @@ /* helper events */ #define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ +#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */ /* flags used for fanotify_init() */ #define FAN_CLOEXEC 0x00000001 @@ -44,6 +52,7 @@ /* Flags to determine fanotify event format */ #define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ +#define FAN_REPORT_FID 0x00000200 /* Report unique file id */ /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ @@ -106,6 +115,26 @@ struct fanotify_event_metadata { __s32 pid; }; +#define FAN_EVENT_INFO_TYPE_FID 1 + +/* Variable length info record following event metadata */ +struct fanotify_event_info_header { + __u8 info_type; + __u8 pad; + __u16 len; +}; + +/* Unique file identifier info record */ +struct fanotify_event_info_fid { + struct fanotify_event_info_header hdr; + __kernel_fsid_t fsid; + /* + * Following is an opaque struct file_handle that can be passed as + * an argument to open_by_handle_at(2). + */ + unsigned char handle[0]; +}; + struct fanotify_response { __s32 fd; __u32 response;