whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

namei.c (27704B)


      1 /*
      2  * Copyright (C) 2011 Novell Inc.
      3  * Copyright (C) 2016 Red Hat, Inc.
      4  *
      5  * This program is free software; you can redistribute it and/or modify it
      6  * under the terms of the GNU General Public License version 2 as published by
      7  * the Free Software Foundation.
      8  */
      9 
     10 #include <linux/fs.h>
     11 #include <linux/cred.h>
     12 #include <linux/ctype.h>
     13 #include <linux/namei.h>
     14 #include <linux/xattr.h>
     15 #include <linux/ratelimit.h>
     16 #include <linux/mount.h>
     17 #include <linux/exportfs.h>
     18 #include "overlayfs.h"
     19 
     20 struct ovl_lookup_data {
     21 	struct qstr name;
     22 	bool is_dir;
     23 	bool opaque;
     24 	bool stop;
     25 	bool last;
     26 	char *redirect;
     27 	bool metacopy;
     28 };
     29 
     30 static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
     31 			      size_t prelen, const char *post)
     32 {
     33 	int res;
     34 	char *buf;
     35 
     36 	buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post));
     37 	if (IS_ERR_OR_NULL(buf))
     38 		return PTR_ERR(buf);
     39 
     40 	if (buf[0] == '/') {
     41 		/*
     42 		 * One of the ancestor path elements in an absolute path
     43 		 * lookup in ovl_lookup_layer() could have been opaque and
     44 		 * that will stop further lookup in lower layers (d->stop=true)
     45 		 * But we have found an absolute redirect in decendant path
     46 		 * element and that should force continue lookup in lower
     47 		 * layers (reset d->stop).
     48 		 */
     49 		d->stop = false;
     50 	} else {
     51 		res = strlen(buf) + 1;
     52 		memmove(buf + prelen, buf, res);
     53 		memcpy(buf, d->name.name, prelen);
     54 	}
     55 
     56 	strcat(buf, post);
     57 	kfree(d->redirect);
     58 	d->redirect = buf;
     59 	d->name.name = d->redirect;
     60 	d->name.len = strlen(d->redirect);
     61 
     62 	return 0;
     63 }
     64 
     65 static int ovl_acceptable(void *ctx, struct dentry *dentry)
     66 {
     67 	/*
     68 	 * A non-dir origin may be disconnected, which is fine, because
     69 	 * we only need it for its unique inode number.
     70 	 */
     71 	if (!d_is_dir(dentry))
     72 		return 1;
     73 
     74 	/* Don't decode a deleted empty directory */
     75 	if (d_unhashed(dentry))
     76 		return 0;
     77 
     78 	/* Check if directory belongs to the layer we are decoding from */
     79 	return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
     80 }
     81 
     82 /*
     83  * Check validity of an overlay file handle buffer.
     84  *
     85  * Return 0 for a valid file handle.
     86  * Return -ENODATA for "origin unknown".
     87  * Return <0 for an invalid file handle.
     88  */
     89 int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
     90 {
     91 	if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len)
     92 		return -EINVAL;
     93 
     94 	if (fh->magic != OVL_FH_MAGIC)
     95 		return -EINVAL;
     96 
     97 	/* Treat larger version and unknown flags as "origin unknown" */
     98 	if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
     99 		return -ENODATA;
    100 
    101 	/* Treat endianness mismatch as "origin unknown" */
    102 	if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
    103 	    (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
    104 		return -ENODATA;
    105 
    106 	return 0;
    107 }
    108 
    109 static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
    110 {
    111 	int res, err;
    112 	struct ovl_fh *fh = NULL;
    113 
    114 	res = vfs_getxattr(dentry, name, NULL, 0);
    115 	if (res < 0) {
    116 		if (res == -ENODATA || res == -EOPNOTSUPP)
    117 			return NULL;
    118 		goto fail;
    119 	}
    120 	/* Zero size value means "copied up but origin unknown" */
    121 	if (res == 0)
    122 		return NULL;
    123 
    124 	fh = kzalloc(res, GFP_KERNEL);
    125 	if (!fh)
    126 		return ERR_PTR(-ENOMEM);
    127 
    128 	res = vfs_getxattr(dentry, name, fh, res);
    129 	if (res < 0)
    130 		goto fail;
    131 
    132 	err = ovl_check_fh_len(fh, res);
    133 	if (err < 0) {
    134 		if (err == -ENODATA)
    135 			goto out;
    136 		goto invalid;
    137 	}
    138 
    139 	return fh;
    140 
    141 out:
    142 	kfree(fh);
    143 	return NULL;
    144 
    145 fail:
    146 	pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
    147 	goto out;
    148 invalid:
    149 	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
    150 	goto out;
    151 }
    152 
    153 struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
    154 				  bool connected)
    155 {
    156 	struct dentry *real;
    157 	int bytes;
    158 
    159 	/*
    160 	 * Make sure that the stored uuid matches the uuid of the lower
    161 	 * layer where file handle will be decoded.
    162 	 */
    163 	if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
    164 		return NULL;
    165 
    166 	bytes = (fh->len - offsetof(struct ovl_fh, fid));
    167 	real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
    168 				  bytes >> 2, (int)fh->type,
    169 				  connected ? ovl_acceptable : NULL, mnt);
    170 	if (IS_ERR(real)) {
    171 		/*
    172 		 * Treat stale file handle to lower file as "origin unknown".
    173 		 * upper file handle could become stale when upper file is
    174 		 * unlinked and this information is needed to handle stale
    175 		 * index entries correctly.
    176 		 */
    177 		if (real == ERR_PTR(-ESTALE) &&
    178 		    !(fh->flags & OVL_FH_FLAG_PATH_UPPER))
    179 			real = NULL;
    180 		return real;
    181 	}
    182 
    183 	if (ovl_dentry_weird(real)) {
    184 		dput(real);
    185 		return NULL;
    186 	}
    187 
    188 	return real;
    189 }
    190 
    191 static bool ovl_is_opaquedir(struct dentry *dentry)
    192 {
    193 	return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
    194 }
    195 
    196 static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
    197 			     const char *name, unsigned int namelen,
    198 			     size_t prelen, const char *post,
    199 			     struct dentry **ret)
    200 {
    201 	struct dentry *this;
    202 	int err;
    203 	bool last_element = !post[0];
    204 
    205 	this = lookup_one_len_unlocked(name, base, namelen);
    206 	if (IS_ERR(this)) {
    207 		err = PTR_ERR(this);
    208 		this = NULL;
    209 		if (err == -ENOENT || err == -ENAMETOOLONG)
    210 			goto out;
    211 		goto out_err;
    212 	}
    213 	if (!this->d_inode)
    214 		goto put_and_out;
    215 
    216 	if (ovl_dentry_weird(this)) {
    217 		/* Don't support traversing automounts and other weirdness */
    218 		err = -EREMOTE;
    219 		goto out_err;
    220 	}
    221 	if (ovl_is_whiteout(this)) {
    222 		d->stop = d->opaque = true;
    223 		goto put_and_out;
    224 	}
    225 	/*
    226 	 * This dentry should be a regular file if previous layer lookup
    227 	 * found a metacopy dentry.
    228 	 */
    229 	if (last_element && d->metacopy && !d_is_reg(this)) {
    230 		d->stop = true;
    231 		goto put_and_out;
    232 	}
    233 	if (!d_can_lookup(this)) {
    234 		if (d->is_dir || !last_element) {
    235 			d->stop = true;
    236 			goto put_and_out;
    237 		}
    238 		err = ovl_check_metacopy_xattr(this);
    239 		if (err < 0)
    240 			goto out_err;
    241 
    242 		d->metacopy = err;
    243 		d->stop = !d->metacopy;
    244 		if (!d->metacopy || d->last)
    245 			goto out;
    246 	} else {
    247 		if (last_element)
    248 			d->is_dir = true;
    249 		if (d->last)
    250 			goto out;
    251 
    252 		if (ovl_is_opaquedir(this)) {
    253 			d->stop = true;
    254 			if (last_element)
    255 				d->opaque = true;
    256 			goto out;
    257 		}
    258 	}
    259 	err = ovl_check_redirect(this, d, prelen, post);
    260 	if (err)
    261 		goto out_err;
    262 out:
    263 	*ret = this;
    264 	return 0;
    265 
    266 put_and_out:
    267 	dput(this);
    268 	this = NULL;
    269 	goto out;
    270 
    271 out_err:
    272 	dput(this);
    273 	return err;
    274 }
    275 
    276 static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
    277 			    struct dentry **ret)
    278 {
    279 	/* Counting down from the end, since the prefix can change */
    280 	size_t rem = d->name.len - 1;
    281 	struct dentry *dentry = NULL;
    282 	int err;
    283 
    284 	if (d->name.name[0] != '/')
    285 		return ovl_lookup_single(base, d, d->name.name, d->name.len,
    286 					 0, "", ret);
    287 
    288 	while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
    289 		const char *s = d->name.name + d->name.len - rem;
    290 		const char *next = strchrnul(s, '/');
    291 		size_t thislen = next - s;
    292 		bool end = !next[0];
    293 
    294 		/* Verify we did not go off the rails */
    295 		if (WARN_ON(s[-1] != '/'))
    296 			return -EIO;
    297 
    298 		err = ovl_lookup_single(base, d, s, thislen,
    299 					d->name.len - rem, next, &base);
    300 		dput(dentry);
    301 		if (err)
    302 			return err;
    303 		dentry = base;
    304 		if (end)
    305 			break;
    306 
    307 		rem -= thislen + 1;
    308 
    309 		if (WARN_ON(rem >= d->name.len))
    310 			return -EIO;
    311 	}
    312 	*ret = dentry;
    313 	return 0;
    314 }
    315 
    316 
    317 int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
    318 			struct dentry *upperdentry, struct ovl_path **stackp)
    319 {
    320 	struct dentry *origin = NULL;
    321 	int i;
    322 
    323 	for (i = 0; i < ofs->numlower; i++) {
    324 		origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt,
    325 					    connected);
    326 		if (origin)
    327 			break;
    328 	}
    329 
    330 	if (!origin)
    331 		return -ESTALE;
    332 	else if (IS_ERR(origin))
    333 		return PTR_ERR(origin);
    334 
    335 	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
    336 	    ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
    337 		goto invalid;
    338 
    339 	if (!*stackp)
    340 		*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
    341 	if (!*stackp) {
    342 		dput(origin);
    343 		return -ENOMEM;
    344 	}
    345 	**stackp = (struct ovl_path){
    346 		.dentry = origin,
    347 		.layer = &ofs->lower_layers[i]
    348 	};
    349 
    350 	return 0;
    351 
    352 invalid:
    353 	pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
    354 			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
    355 			    d_inode(origin)->i_mode & S_IFMT);
    356 	dput(origin);
    357 	return -EIO;
    358 }
    359 
    360 static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
    361 			    struct ovl_path **stackp, unsigned int *ctrp)
    362 {
    363 	struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
    364 	int err;
    365 
    366 	if (IS_ERR_OR_NULL(fh))
    367 		return PTR_ERR(fh);
    368 
    369 	err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
    370 	kfree(fh);
    371 
    372 	if (err) {
    373 		if (err == -ESTALE)
    374 			return 0;
    375 		return err;
    376 	}
    377 
    378 	if (WARN_ON(*ctrp))
    379 		return -EIO;
    380 
    381 	*ctrp = 1;
    382 	return 0;
    383 }
    384 
    385 /*
    386  * Verify that @fh matches the file handle stored in xattr @name.
    387  * Return 0 on match, -ESTALE on mismatch, < 0 on error.
    388  */
    389 static int ovl_verify_fh(struct dentry *dentry, const char *name,
    390 			 const struct ovl_fh *fh)
    391 {
    392 	struct ovl_fh *ofh = ovl_get_fh(dentry, name);
    393 	int err = 0;
    394 
    395 	if (!ofh)
    396 		return -ENODATA;
    397 
    398 	if (IS_ERR(ofh))
    399 		return PTR_ERR(ofh);
    400 
    401 	if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
    402 		err = -ESTALE;
    403 
    404 	kfree(ofh);
    405 	return err;
    406 }
    407 
    408 /*
    409  * Verify that @real dentry matches the file handle stored in xattr @name.
    410  *
    411  * If @set is true and there is no stored file handle, encode @real and store
    412  * file handle in xattr @name.
    413  *
    414  * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
    415  */
    416 int ovl_verify_set_fh(struct dentry *dentry, const char *name,
    417 		      struct dentry *real, bool is_upper, bool set)
    418 {
    419 	struct inode *inode;
    420 	struct ovl_fh *fh;
    421 	int err;
    422 
    423 	fh = ovl_encode_real_fh(real, is_upper);
    424 	err = PTR_ERR(fh);
    425 	if (IS_ERR(fh)) {
    426 		fh = NULL;
    427 		goto fail;
    428 	}
    429 
    430 	err = ovl_verify_fh(dentry, name, fh);
    431 	if (set && err == -ENODATA)
    432 		err = ovl_do_setxattr(dentry, name, fh, fh->len, 0);
    433 	if (err)
    434 		goto fail;
    435 
    436 out:
    437 	kfree(fh);
    438 	return err;
    439 
    440 fail:
    441 	inode = d_inode(real);
    442 	pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n",
    443 			    is_upper ? "upper" : "origin", real,
    444 			    inode ? inode->i_ino : 0, err);
    445 	goto out;
    446 }
    447 
    448 /* Get upper dentry from index */
    449 struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
    450 {
    451 	struct ovl_fh *fh;
    452 	struct dentry *upper;
    453 
    454 	if (!d_is_dir(index))
    455 		return dget(index);
    456 
    457 	fh = ovl_get_fh(index, OVL_XATTR_UPPER);
    458 	if (IS_ERR_OR_NULL(fh))
    459 		return ERR_CAST(fh);
    460 
    461 	upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
    462 	kfree(fh);
    463 
    464 	if (IS_ERR_OR_NULL(upper))
    465 		return upper ?: ERR_PTR(-ESTALE);
    466 
    467 	if (!d_is_dir(upper)) {
    468 		pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n",
    469 				    index, upper);
    470 		dput(upper);
    471 		return ERR_PTR(-EIO);
    472 	}
    473 
    474 	return upper;
    475 }
    476 
    477 /* Is this a leftover from create/whiteout of directory index entry? */
    478 static bool ovl_is_temp_index(struct dentry *index)
    479 {
    480 	return index->d_name.name[0] == '#';
    481 }
    482 
    483 /*
    484  * Verify that an index entry name matches the origin file handle stored in
    485  * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
    486  * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
    487  */
    488 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
    489 {
    490 	struct ovl_fh *fh = NULL;
    491 	size_t len;
    492 	struct ovl_path origin = { };
    493 	struct ovl_path *stack = &origin;
    494 	struct dentry *upper = NULL;
    495 	int err;
    496 
    497 	if (!d_inode(index))
    498 		return 0;
    499 
    500 	/* Cleanup leftover from index create/cleanup attempt */
    501 	err = -ESTALE;
    502 	if (ovl_is_temp_index(index))
    503 		goto fail;
    504 
    505 	err = -EINVAL;
    506 	if (index->d_name.len < sizeof(struct ovl_fh)*2)
    507 		goto fail;
    508 
    509 	err = -ENOMEM;
    510 	len = index->d_name.len / 2;
    511 	fh = kzalloc(len, GFP_KERNEL);
    512 	if (!fh)
    513 		goto fail;
    514 
    515 	err = -EINVAL;
    516 	if (hex2bin((u8 *)fh, index->d_name.name, len))
    517 		goto fail;
    518 
    519 	err = ovl_check_fh_len(fh, len);
    520 	if (err)
    521 		goto fail;
    522 
    523 	/*
    524 	 * Whiteout index entries are used as an indication that an exported
    525 	 * overlay file handle should be treated as stale (i.e. after unlink
    526 	 * of the overlay inode). These entries contain no origin xattr.
    527 	 */
    528 	if (ovl_is_whiteout(index))
    529 		goto out;
    530 
    531 	/*
    532 	 * Verifying directory index entries are not stale is expensive, so
    533 	 * only verify stale dir index if NFS export is enabled.
    534 	 */
    535 	if (d_is_dir(index) && !ofs->config.nfs_export)
    536 		goto out;
    537 
    538 	/*
    539 	 * Directory index entries should have 'upper' xattr pointing to the
    540 	 * real upper dir. Non-dir index entries are hardlinks to the upper
    541 	 * real inode. For non-dir index, we can read the copy up origin xattr
    542 	 * directly from the index dentry, but for dir index we first need to
    543 	 * decode the upper directory.
    544 	 */
    545 	upper = ovl_index_upper(ofs, index);
    546 	if (IS_ERR_OR_NULL(upper)) {
    547 		err = PTR_ERR(upper);
    548 		/*
    549 		 * Directory index entries with no 'upper' xattr need to be
    550 		 * removed. When dir index entry has a stale 'upper' xattr,
    551 		 * we assume that upper dir was removed and we treat the dir
    552 		 * index as orphan entry that needs to be whited out.
    553 		 */
    554 		if (err == -ESTALE)
    555 			goto orphan;
    556 		else if (!err)
    557 			err = -ESTALE;
    558 		goto fail;
    559 	}
    560 
    561 	err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh);
    562 	dput(upper);
    563 	if (err)
    564 		goto fail;
    565 
    566 	/* Check if non-dir index is orphan and don't warn before cleaning it */
    567 	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
    568 		err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
    569 		if (err)
    570 			goto fail;
    571 
    572 		if (ovl_get_nlink(origin.dentry, index, 0) == 0)
    573 			goto orphan;
    574 	}
    575 
    576 out:
    577 	dput(origin.dentry);
    578 	kfree(fh);
    579 	return err;
    580 
    581 fail:
    582 	pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
    583 			    index, d_inode(index)->i_mode & S_IFMT, err);
    584 	goto out;
    585 
    586 orphan:
    587 	pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
    588 			    index, d_inode(index)->i_mode & S_IFMT,
    589 			    d_inode(index)->i_nlink);
    590 	err = -ENOENT;
    591 	goto out;
    592 }
    593 
    594 static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
    595 {
    596 	char *n, *s;
    597 
    598 	n = kcalloc(fh->len, 2, GFP_KERNEL);
    599 	if (!n)
    600 		return -ENOMEM;
    601 
    602 	s  = bin2hex(n, fh, fh->len);
    603 	*name = (struct qstr) QSTR_INIT(n, s - n);
    604 
    605 	return 0;
    606 
    607 }
    608 
    609 /*
    610  * Lookup in indexdir for the index entry of a lower real inode or a copy up
    611  * origin inode. The index entry name is the hex representation of the lower
    612  * inode file handle.
    613  *
    614  * If the index dentry in negative, then either no lower aliases have been
    615  * copied up yet, or aliases have been copied up in older kernels and are
    616  * not indexed.
    617  *
    618  * If the index dentry for a copy up origin inode is positive, but points
    619  * to an inode different than the upper inode, then either the upper inode
    620  * has been copied up and not indexed or it was indexed, but since then
    621  * index dir was cleared. Either way, that index cannot be used to indentify
    622  * the overlay inode.
    623  */
    624 int ovl_get_index_name(struct dentry *origin, struct qstr *name)
    625 {
    626 	struct ovl_fh *fh;
    627 	int err;
    628 
    629 	fh = ovl_encode_real_fh(origin, false);
    630 	if (IS_ERR(fh))
    631 		return PTR_ERR(fh);
    632 
    633 	err = ovl_get_index_name_fh(fh, name);
    634 
    635 	kfree(fh);
    636 	return err;
    637 }
    638 
    639 /* Lookup index by file handle for NFS export */
    640 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
    641 {
    642 	struct dentry *index;
    643 	struct qstr name;
    644 	int err;
    645 
    646 	err = ovl_get_index_name_fh(fh, &name);
    647 	if (err)
    648 		return ERR_PTR(err);
    649 
    650 	index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
    651 	kfree(name.name);
    652 	if (IS_ERR(index)) {
    653 		if (PTR_ERR(index) == -ENOENT)
    654 			index = NULL;
    655 		return index;
    656 	}
    657 
    658 	if (d_is_negative(index))
    659 		err = 0;
    660 	else if (ovl_is_whiteout(index))
    661 		err = -ESTALE;
    662 	else if (ovl_dentry_weird(index))
    663 		err = -EIO;
    664 	else
    665 		return index;
    666 
    667 	dput(index);
    668 	return ERR_PTR(err);
    669 }
    670 
    671 struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
    672 				struct dentry *origin, bool verify)
    673 {
    674 	struct dentry *index;
    675 	struct inode *inode;
    676 	struct qstr name;
    677 	bool is_dir = d_is_dir(origin);
    678 	int err;
    679 
    680 	err = ovl_get_index_name(origin, &name);
    681 	if (err)
    682 		return ERR_PTR(err);
    683 
    684 	index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
    685 	if (IS_ERR(index)) {
    686 		err = PTR_ERR(index);
    687 		if (err == -ENOENT) {
    688 			index = NULL;
    689 			goto out;
    690 		}
    691 		pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
    692 				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
    693 				    d_inode(origin)->i_ino, name.len, name.name,
    694 				    err);
    695 		goto out;
    696 	}
    697 
    698 	inode = d_inode(index);
    699 	if (d_is_negative(index)) {
    700 		goto out_dput;
    701 	} else if (ovl_is_whiteout(index) && !verify) {
    702 		/*
    703 		 * When index lookup is called with !verify for decoding an
    704 		 * overlay file handle, a whiteout index implies that decode
    705 		 * should treat file handle as stale and no need to print a
    706 		 * warning about it.
    707 		 */
    708 		dput(index);
    709 		index = ERR_PTR(-ESTALE);
    710 		goto out;
    711 	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
    712 		   ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
    713 		/*
    714 		 * Index should always be of the same file type as origin
    715 		 * except for the case of a whiteout index. A whiteout
    716 		 * index should only exist if all lower aliases have been
    717 		 * unlinked, which means that finding a lower origin on lookup
    718 		 * whose index is a whiteout should be treated as an error.
    719 		 */
    720 		pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
    721 				    index, d_inode(index)->i_mode & S_IFMT,
    722 				    d_inode(origin)->i_mode & S_IFMT);
    723 		goto fail;
    724 	} else if (is_dir && verify) {
    725 		if (!upper) {
    726 			pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
    727 					    origin, index);
    728 			goto fail;
    729 		}
    730 
    731 		/* Verify that dir index 'upper' xattr points to upper dir */
    732 		err = ovl_verify_upper(index, upper, false);
    733 		if (err) {
    734 			if (err == -ESTALE) {
    735 				pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
    736 						    upper, origin, index);
    737 			}
    738 			goto fail;
    739 		}
    740 	} else if (upper && d_inode(upper) != inode) {
    741 		goto out_dput;
    742 	}
    743 out:
    744 	kfree(name.name);
    745 	return index;
    746 
    747 out_dput:
    748 	dput(index);
    749 	index = NULL;
    750 	goto out;
    751 
    752 fail:
    753 	dput(index);
    754 	index = ERR_PTR(-EIO);
    755 	goto out;
    756 }
    757 
    758 /*
    759  * Returns next layer in stack starting from top.
    760  * Returns -1 if this is the last layer.
    761  */
    762 int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
    763 {
    764 	struct ovl_entry *oe = dentry->d_fsdata;
    765 
    766 	BUG_ON(idx < 0);
    767 	if (idx == 0) {
    768 		ovl_path_upper(dentry, path);
    769 		if (path->dentry)
    770 			return oe->numlower ? 1 : -1;
    771 		idx++;
    772 	}
    773 	BUG_ON(idx > oe->numlower);
    774 	path->dentry = oe->lowerstack[idx - 1].dentry;
    775 	path->mnt = oe->lowerstack[idx - 1].layer->mnt;
    776 
    777 	return (idx < oe->numlower) ? idx + 1 : -1;
    778 }
    779 
    780 /* Fix missing 'origin' xattr */
    781 static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
    782 			  struct dentry *upper)
    783 {
    784 	int err;
    785 
    786 	if (ovl_check_origin_xattr(upper))
    787 		return 0;
    788 
    789 	err = ovl_want_write(dentry);
    790 	if (err)
    791 		return err;
    792 
    793 	err = ovl_set_origin(dentry, lower, upper);
    794 	if (!err)
    795 		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
    796 
    797 	ovl_drop_write(dentry);
    798 	return err;
    799 }
    800 
    801 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
    802 			  unsigned int flags)
    803 {
    804 	struct ovl_entry *oe;
    805 	const struct cred *old_cred;
    806 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
    807 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
    808 	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
    809 	struct ovl_path *stack = NULL, *origin_path = NULL;
    810 	struct dentry *upperdir, *upperdentry = NULL;
    811 	struct dentry *origin = NULL;
    812 	struct dentry *index = NULL;
    813 	unsigned int ctr = 0;
    814 	struct inode *inode = NULL;
    815 	bool upperopaque = false;
    816 	char *upperredirect = NULL;
    817 	struct dentry *this;
    818 	unsigned int i;
    819 	int err;
    820 	bool metacopy = false;
    821 	struct ovl_lookup_data d = {
    822 		.name = dentry->d_name,
    823 		.is_dir = false,
    824 		.opaque = false,
    825 		.stop = false,
    826 		.last = ofs->config.redirect_follow ? false : !poe->numlower,
    827 		.redirect = NULL,
    828 		.metacopy = false,
    829 	};
    830 
    831 	if (dentry->d_name.len > ofs->namelen)
    832 		return ERR_PTR(-ENAMETOOLONG);
    833 
    834 	old_cred = ovl_override_creds(dentry->d_sb);
    835 	upperdir = ovl_dentry_upper(dentry->d_parent);
    836 	if (upperdir) {
    837 		err = ovl_lookup_layer(upperdir, &d, &upperdentry);
    838 		if (err)
    839 			goto out;
    840 
    841 		if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) {
    842 			dput(upperdentry);
    843 			err = -EREMOTE;
    844 			goto out;
    845 		}
    846 		if (upperdentry && !d.is_dir) {
    847 			unsigned int origin_ctr = 0;
    848 
    849 			/*
    850 			 * Lookup copy up origin by decoding origin file handle.
    851 			 * We may get a disconnected dentry, which is fine,
    852 			 * because we only need to hold the origin inode in
    853 			 * cache and use its inode number.  We may even get a
    854 			 * connected dentry, that is not under any of the lower
    855 			 * layers root.  That is also fine for using it's inode
    856 			 * number - it's the same as if we held a reference
    857 			 * to a dentry in lower layer that was moved under us.
    858 			 */
    859 			err = ovl_check_origin(ofs, upperdentry, &origin_path,
    860 					       &origin_ctr);
    861 			if (err)
    862 				goto out_put_upper;
    863 
    864 			if (d.metacopy)
    865 				metacopy = true;
    866 		}
    867 
    868 		if (d.redirect) {
    869 			err = -ENOMEM;
    870 			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
    871 			if (!upperredirect)
    872 				goto out_put_upper;
    873 			if (d.redirect[0] == '/')
    874 				poe = roe;
    875 		}
    876 		upperopaque = d.opaque;
    877 	}
    878 
    879 	if (!d.stop && poe->numlower) {
    880 		err = -ENOMEM;
    881 		stack = kcalloc(ofs->numlower, sizeof(struct ovl_path),
    882 				GFP_KERNEL);
    883 		if (!stack)
    884 			goto out_put_upper;
    885 	}
    886 
    887 	for (i = 0; !d.stop && i < poe->numlower; i++) {
    888 		struct ovl_path lower = poe->lowerstack[i];
    889 
    890 		if (!ofs->config.redirect_follow)
    891 			d.last = i == poe->numlower - 1;
    892 		else
    893 			d.last = lower.layer->idx == roe->numlower;
    894 
    895 		err = ovl_lookup_layer(lower.dentry, &d, &this);
    896 		if (err)
    897 			goto out_put;
    898 
    899 		if (!this)
    900 			continue;
    901 
    902 		/*
    903 		 * If no origin fh is stored in upper of a merge dir, store fh
    904 		 * of lower dir and set upper parent "impure".
    905 		 */
    906 		if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
    907 			err = ovl_fix_origin(dentry, this, upperdentry);
    908 			if (err) {
    909 				dput(this);
    910 				goto out_put;
    911 			}
    912 		}
    913 
    914 		/*
    915 		 * When "verify_lower" feature is enabled, do not merge with a
    916 		 * lower dir that does not match a stored origin xattr. In any
    917 		 * case, only verified origin is used for index lookup.
    918 		 *
    919 		 * For non-dir dentry, if index=on, then ensure origin
    920 		 * matches the dentry found using path based lookup,
    921 		 * otherwise error out.
    922 		 */
    923 		if (upperdentry && !ctr &&
    924 		    ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
    925 		     (!d.is_dir && ofs->config.index && origin_path))) {
    926 			err = ovl_verify_origin(upperdentry, this, false);
    927 			if (err) {
    928 				dput(this);
    929 				if (d.is_dir)
    930 					break;
    931 				goto out_put;
    932 			}
    933 			origin = this;
    934 		}
    935 
    936 		if (d.metacopy)
    937 			metacopy = true;
    938 		/*
    939 		 * Do not store intermediate metacopy dentries in chain,
    940 		 * except top most lower metacopy dentry
    941 		 */
    942 		if (d.metacopy && ctr) {
    943 			dput(this);
    944 			continue;
    945 		}
    946 
    947 		stack[ctr].dentry = this;
    948 		stack[ctr].layer = lower.layer;
    949 		ctr++;
    950 
    951 		/*
    952 		 * Following redirects can have security consequences: it's like
    953 		 * a symlink into the lower layer without the permission checks.
    954 		 * This is only a problem if the upper layer is untrusted (e.g
    955 		 * comes from an USB drive).  This can allow a non-readable file
    956 		 * or directory to become readable.
    957 		 *
    958 		 * Only following redirects when redirects are enabled disables
    959 		 * this attack vector when not necessary.
    960 		 */
    961 		err = -EPERM;
    962 		if (d.redirect && !ofs->config.redirect_follow) {
    963 			pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n",
    964 					    dentry);
    965 			goto out_put;
    966 		}
    967 
    968 		if (d.stop)
    969 			break;
    970 
    971 		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
    972 			poe = roe;
    973 			/* Find the current layer on the root dentry */
    974 			i = lower.layer->idx - 1;
    975 		}
    976 	}
    977 
    978 	if (metacopy) {
    979 		/*
    980 		 * Found a metacopy dentry but did not find corresponding
    981 		 * data dentry
    982 		 */
    983 		if (d.metacopy) {
    984 			err = -EIO;
    985 			goto out_put;
    986 		}
    987 
    988 		err = -EPERM;
    989 		if (!ofs->config.metacopy) {
    990 			pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n",
    991 					    dentry);
    992 			goto out_put;
    993 		}
    994 	} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
    995 		if (WARN_ON(stack != NULL)) {
    996 			err = -EIO;
    997 			goto out_put;
    998 		}
    999 		stack = origin_path;
   1000 		ctr = 1;
   1001 		origin_path = NULL;
   1002 	}
   1003 
   1004 	/*
   1005 	 * Lookup index by lower inode and verify it matches upper inode.
   1006 	 * We only trust dir index if we verified that lower dir matches
   1007 	 * origin, otherwise dir index entries may be inconsistent and we
   1008 	 * ignore them.
   1009 	 *
   1010 	 * For non-dir upper metacopy dentry, we already set "origin" if we
   1011 	 * verified that lower matched upper origin. If upper origin was
   1012 	 * not present (because lower layer did not support fh encode/decode),
   1013 	 * or indexing is not enabled, do not set "origin" and skip looking up
   1014 	 * index. This case should be handled in same way as a non-dir upper
   1015 	 * without ORIGIN is handled.
   1016 	 *
   1017 	 * Always lookup index of non-dir non-metacopy and non-upper.
   1018 	 */
   1019 	if (ctr && (!upperdentry || (!d.is_dir && !metacopy)))
   1020 		origin = stack[0].dentry;
   1021 
   1022 	if (origin && ovl_indexdir(dentry->d_sb) &&
   1023 	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
   1024 		index = ovl_lookup_index(ofs, upperdentry, origin, true);
   1025 		if (IS_ERR(index)) {
   1026 			err = PTR_ERR(index);
   1027 			index = NULL;
   1028 			goto out_put;
   1029 		}
   1030 	}
   1031 
   1032 	oe = ovl_alloc_entry(ctr);
   1033 	err = -ENOMEM;
   1034 	if (!oe)
   1035 		goto out_put;
   1036 
   1037 	memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
   1038 	dentry->d_fsdata = oe;
   1039 
   1040 	if (upperopaque)
   1041 		ovl_dentry_set_opaque(dentry);
   1042 
   1043 	if (upperdentry)
   1044 		ovl_dentry_set_upper_alias(dentry);
   1045 	else if (index) {
   1046 		upperdentry = dget(index);
   1047 		upperredirect = ovl_get_redirect_xattr(upperdentry, 0);
   1048 		if (IS_ERR(upperredirect)) {
   1049 			err = PTR_ERR(upperredirect);
   1050 			upperredirect = NULL;
   1051 			goto out_free_oe;
   1052 		}
   1053 	}
   1054 
   1055 	if (upperdentry || ctr) {
   1056 		struct ovl_inode_params oip = {
   1057 			.upperdentry = upperdentry,
   1058 			.lowerpath = stack,
   1059 			.index = index,
   1060 			.numlower = ctr,
   1061 			.redirect = upperredirect,
   1062 			.lowerdata = (ctr > 1 && !d.is_dir) ?
   1063 				      stack[ctr - 1].dentry : NULL,
   1064 		};
   1065 
   1066 		inode = ovl_get_inode(dentry->d_sb, &oip);
   1067 		err = PTR_ERR(inode);
   1068 		if (IS_ERR(inode))
   1069 			goto out_free_oe;
   1070 	}
   1071 
   1072 	revert_creds(old_cred);
   1073 	if (origin_path) {
   1074 		dput(origin_path->dentry);
   1075 		kfree(origin_path);
   1076 	}
   1077 	dput(index);
   1078 	kfree(stack);
   1079 	kfree(d.redirect);
   1080 	return d_splice_alias(inode, dentry);
   1081 
   1082 out_free_oe:
   1083 	dentry->d_fsdata = NULL;
   1084 	kfree(oe);
   1085 out_put:
   1086 	dput(index);
   1087 	for (i = 0; i < ctr; i++)
   1088 		dput(stack[i].dentry);
   1089 	kfree(stack);
   1090 out_put_upper:
   1091 	if (origin_path) {
   1092 		dput(origin_path->dentry);
   1093 		kfree(origin_path);
   1094 	}
   1095 	dput(upperdentry);
   1096 	kfree(upperredirect);
   1097 out:
   1098 	kfree(d.redirect);
   1099 	revert_creds(old_cred);
   1100 	return ERR_PTR(err);
   1101 }
   1102 
   1103 bool ovl_lower_positive(struct dentry *dentry)
   1104 {
   1105 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
   1106 	const struct qstr *name = &dentry->d_name;
   1107 	const struct cred *old_cred;
   1108 	unsigned int i;
   1109 	bool positive = false;
   1110 	bool done = false;
   1111 
   1112 	/*
   1113 	 * If dentry is negative, then lower is positive iff this is a
   1114 	 * whiteout.
   1115 	 */
   1116 	if (!dentry->d_inode)
   1117 		return ovl_dentry_is_opaque(dentry);
   1118 
   1119 	/* Negative upper -> positive lower */
   1120 	if (!ovl_dentry_upper(dentry))
   1121 		return true;
   1122 
   1123 	old_cred = ovl_override_creds(dentry->d_sb);
   1124 	/* Positive upper -> have to look up lower to see whether it exists */
   1125 	for (i = 0; !done && !positive && i < poe->numlower; i++) {
   1126 		struct dentry *this;
   1127 		struct dentry *lowerdir = poe->lowerstack[i].dentry;
   1128 
   1129 		this = lookup_one_len_unlocked(name->name, lowerdir,
   1130 					       name->len);
   1131 		if (IS_ERR(this)) {
   1132 			switch (PTR_ERR(this)) {
   1133 			case -ENOENT:
   1134 			case -ENAMETOOLONG:
   1135 				break;
   1136 
   1137 			default:
   1138 				/*
   1139 				 * Assume something is there, we just couldn't
   1140 				 * access it.
   1141 				 */
   1142 				positive = true;
   1143 				break;
   1144 			}
   1145 		} else {
   1146 			if (this->d_inode) {
   1147 				positive = !ovl_is_whiteout(this);
   1148 				done = true;
   1149 			}
   1150 			dput(this);
   1151 		}
   1152 	}
   1153 	revert_creds(old_cred);
   1154 
   1155 	return positive;
   1156 }