From 902d554349ee38ea44b0af068651c7320080b22c Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Sat, 16 Nov 2024 21:39:56 +0100 Subject: [PATCH] Linux: O_TMPFILE and insert inode hash rework This change: - reworks inode hash insert so that its in topmost layer and with inode unlock in the same order as other Linux FS implementations do it - reworks O_TMPFILE creation to use new linux specific zp->z_is_tmpfile, this fixes a potential unchecked error return value of zpl_xattr_security_init - changes igrabs to zhold where it can - exchanges truncate_setsize for truncate_inode_pages_final in zpl_evict_inode to follow vfs.rst's "the method has to use truncate_inode_pages_final()" Signed-off-by: Pavel Snajdr --- include/os/linux/zfs/sys/zfs_znode_impl.h | 1 + module/os/linux/zfs/zfs_dir.c | 3 +- module/os/linux/zfs/zfs_vfsops.c | 2 +- module/os/linux/zfs/zfs_vnops_os.c | 45 +++++++--------- module/os/linux/zfs/zfs_znode_os.c | 29 ++++------ module/os/linux/zfs/zpl_inode.c | 65 ++++++++++++----------- module/os/linux/zfs/zpl_super.c | 2 +- 7 files changed, 68 insertions(+), 79 deletions(-) diff --git a/include/os/linux/zfs/sys/zfs_znode_impl.h b/include/os/linux/zfs/sys/zfs_znode_impl.h index cc8e5150eaf1..0a9f92152e8e 100644 --- a/include/os/linux/zfs/sys/zfs_znode_impl.h +++ b/include/os/linux/zfs/sys/zfs_znode_impl.h @@ -48,6 +48,7 @@ extern "C" { #endif #define ZNODE_OS_FIELDS \ + boolean_t z_is_tmpfile; /* file is a tmpfile */ \ inode_timespec_t z_btime; /* creation/birth time (cached) */ \ struct inode z_inode; diff --git a/module/os/linux/zfs/zfs_dir.c b/module/os/linux/zfs/zfs_dir.c index 564e89b37d11..011cc3217e48 100644 --- a/module/os/linux/zfs/zfs_dir.c +++ b/module/os/linux/zfs/zfs_dir.c @@ -816,7 +816,8 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) mutex_enter(&zp->z_lock); if (!(flag & ZRENAMING)) { - if (zp->z_unlinked) { /* no new links to unlinked zp */ + if (zp->z_unlinked && !zp->z_is_tmpfile) { + /* no new links to unlinked zp */ ASSERT(!(flag & (ZNEW | ZEXISTS))); mutex_exit(&zp->z_lock); return (SET_ERROR(ENOENT)); diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index 3c53a8a315c3..f61459f3f483 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -1717,7 +1717,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp) * Must have an existing ref, so igrab() * cannot return NULL */ - VERIFY3P(igrab(*ipp), !=, NULL); + zhold(ITOZ(*ipp)); } zfs_exit(zfsvfs, FTAG); return (0); diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index dd9fd760b9c2..651b61af2d5d 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -759,7 +759,6 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl, * delete the newly created dnode. */ zfs_znode_delete(zp, tx); - remove_inode_hash(ZTOI(zp)); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); goto out; @@ -945,9 +944,6 @@ zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl, if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); - /* Add to unlinked set */ - zp->z_unlinked = B_TRUE; - zfs_unlinked_add(zp, tx); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); out: @@ -1363,7 +1359,6 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp, error = zfs_link_create(dl, zp, tx, ZNEW); if (error != 0) { zfs_znode_delete(zp, tx); - remove_inode_hash(ZTOI(zp)); goto out; } @@ -3168,10 +3163,12 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, zfs_mknode(sdzp, wo_vap, tx, cr, 0, &wzp, &acl_ids); error = zfs_link_create(sdl, wzp, tx, ZNEW); if (error) { + unlock_new_inode(ZTOI(wzp)); zfs_znode_delete(wzp, tx); - remove_inode_hash(ZTOI(wzp)); goto commit_unlink_td_szp; } + VERIFY0(insert_inode_locked(ZTOI(wzp))); + unlock_new_inode(ZTOI(wzp)); break; } @@ -3406,7 +3403,6 @@ zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link, error = zfs_link_create(dl, zp, tx, ZNEW); if (error != 0) { zfs_znode_delete(zp, tx); - remove_inode_hash(ZTOI(zp)); } else { if (flags & FIGNORECASE) txtype |= TX_CI; @@ -3503,11 +3499,8 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr, uint64_t parent; uid_t owner; boolean_t waited = B_FALSE; - boolean_t is_tmpfile = 0; uint64_t txg; - is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE)); - ASSERT(S_ISDIR(ZTOI(tdzp)->i_mode)); if (name == NULL) @@ -3610,7 +3603,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr, tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name); - if (is_tmpfile) + if (szp->z_is_tmpfile && szp->z_unlinked) dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); zfs_sa_upgrade_txholds(tx, szp); @@ -3628,41 +3621,43 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr, zfs_exit(zfsvfs, FTAG); return (error); } - /* unmark z_unlinked so zfs_link_create will not reject */ - if (is_tmpfile) - szp->z_unlinked = B_FALSE; error = zfs_link_create(dl, szp, tx, 0); if (error == 0) { uint64_t txtype = TX_LINK; /* - * tmpfile is created to be in z_unlinkedobj, so remove it. - * Also, we don't log in ZIL, because all previous file + * We don't log tmpfile in ZIL, because all previous file * operation on the tmpfile are ignored by ZIL. Instead we * always wait for txg to sync to make sure all previous * operation are sync safe. */ - if (is_tmpfile) { - VERIFY(zap_remove_int(zfsvfs->z_os, - zfsvfs->z_unlinkedobj, szp->z_id, tx) == 0); - } else { + if (!szp->z_is_tmpfile || !szp->z_unlinked) { if (flags & FIGNORECASE) txtype |= TX_CI; zfs_log_link(zilog, tx, txtype, tdzp, szp, name); } - } else if (is_tmpfile) { - /* restore z_unlinked since when linking failed */ - szp->z_unlinked = B_TRUE; + if (szp->z_is_tmpfile) { + mutex_enter(&szp->z_lock); + if (szp->z_unlinked) { + szp->z_unlinked = B_FALSE; + VERIFY0(zap_remove_int(zfsvfs->z_os, + zfsvfs->z_unlinkedobj, + szp->z_id, tx)); + } + mutex_exit(&szp->z_lock); + } } txg = dmu_tx_get_txg(tx); dmu_tx_commit(tx); zfs_dirent_unlock(dl); - if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + if ((!szp->z_is_tmpfile || !szp->z_unlinked) && + zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); - if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) + if (szp->z_is_tmpfile && szp->z_unlinked && + (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED)) txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg); zfs_znode_update_vfs(tdzp); diff --git a/module/os/linux/zfs/zfs_znode_os.c b/module/os/linux/zfs/zfs_znode_os.c index bbaca2f58394..2158ce1ecaba 100644 --- a/module/os/linux/zfs/zfs_znode_os.c +++ b/module/os/linux/zfs/zfs_znode_os.c @@ -525,6 +525,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ASSERT3P(zp->z_acl_cached, ==, NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL); zp->z_unlinked = B_FALSE; + zp->z_is_tmpfile = B_FALSE; zp->z_atime_dirty = B_FALSE; zp->z_is_ctldir = B_FALSE; zp->z_suspended = B_FALSE; @@ -590,28 +591,10 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, zfs_znode_update_vfs(zp); zfs_inode_set_ops(zfsvfs, ip); - /* - * The only way insert_inode_locked() can fail is if the ip->i_ino - * number is already hashed for this super block. This can never - * happen because the inode numbers map 1:1 with the object numbers. - * - * Exceptions include rolling back a mounted file system, either - * from the zfs rollback or zfs recv command. - * - * Active inodes are unhashed during the rollback, but since zrele - * can happen asynchronously, we can't guarantee they've been - * unhashed. This can cause hash collisions in unlinked drain - * processing so do not hash unlinked znodes. - */ - if (links > 0) - VERIFY3S(insert_inode_locked(ip), ==, 0); - mutex_enter(&zfsvfs->z_znodes_lock); list_insert_tail(&zfsvfs->z_all_znodes, zp); mutex_exit(&zfsvfs->z_znodes_lock); - if (links > 0) - unlock_new_inode(ip); return (zp); error: @@ -924,6 +907,12 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, (*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode; (*zpp)->z_dnodesize = dnodesize; (*zpp)->z_projid = projid; + if (flag & IS_TMPFILE) { + (*zpp)->z_is_tmpfile = B_TRUE; + /* Add to unlinked set */ + (*zpp)->z_unlinked = B_TRUE; + zfs_unlinked_add((*zpp), tx); + } if (obj_type == DMU_OT_ZNODE || acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { @@ -1106,7 +1095,7 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) * the VFS that this inode should not be evicted. */ if (igrab(ZTOI(zp)) == NULL) { - if (zp->z_unlinked) + if (zp->z_unlinked && !zp->z_is_tmpfile) err = SET_ERROR(ENOENT); else err = SET_ERROR(EAGAIN); @@ -1143,6 +1132,8 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) err = SET_ERROR(ENOENT); } else { *zpp = zp; + VERIFY0(insert_inode_locked(ZTOI(zp))); + unlock_new_inode(ZTOI(zp)); } zfs_znode_hold_exit(zfsvfs, zh); return (err); diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index c4b5087ca5e7..fb985f2bc5c6 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -199,10 +199,10 @@ zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag) if (error) { (void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0); - remove_inode_hash(ZTOI(zp)); - iput(ZTOI(zp)); } else { + VERIFY0(insert_inode_locked(ZTOI(zp))); d_instantiate(dentry, ZTOI(zp)); + unlock_new_inode(ZTOI(zp)); } } @@ -261,10 +261,10 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, if (error) { (void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0); - remove_inode_hash(ZTOI(zp)); - iput(ZTOI(zp)); } else { + VERIFY0(insert_inode_locked(ZTOI(zp))); d_instantiate(dentry, ZTOI(zp)); + unlock_new_inode(ZTOI(zp)); } } @@ -301,6 +301,16 @@ zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) zidmap_t *userns = kcred->user_ns; #endif +#ifndef HAVE_TMPFILE_DENTRY +#define fname &file->f_path.dentry->d_name +#define tmpfp file +#define _fini error = finish_open_simple(file, error) +#else +#define fname &dentry->d_name +#define tmpfp dentry +#define _fini +#endif + crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); /* @@ -314,23 +324,16 @@ zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) cookie = spl_fstrans_mark(); error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL, userns); if (error == 0) { - /* d_tmpfile will do drop_nlink, so we should set it first */ - set_nlink(ip, 1); -#ifndef HAVE_TMPFILE_DENTRY - d_tmpfile(file, ip); - - error = zpl_xattr_security_init(ip, dir, - &file->f_path.dentry->d_name); -#else - d_tmpfile(dentry, ip); - - error = zpl_xattr_security_init(ip, dir, &dentry->d_name); -#endif + error = zpl_xattr_security_init(ip, dir, fname); if (error == 0) error = zpl_init_acl(ip, dir); -#ifndef HAVE_TMPFILE_DENTRY - error = finish_open_simple(file, error); -#endif + if (error == 0) { + VERIFY0(insert_inode_locked(ip)); + set_nlink(ip, 1); + d_tmpfile(tmpfp, ip); + unlock_new_inode(ip); + } + _fini; /* * don't need to handle error here, file is already in * unlinked set. @@ -409,10 +412,10 @@ zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (error) { (void) zfs_rmdir(ITOZ(dir), dname(dentry), NULL, cr, 0); - remove_inode_hash(ZTOI(zp)); - iput(ZTOI(zp)); } else { + VERIFY0(insert_inode_locked(ZTOI(zp))); d_instantiate(dentry, ZTOI(zp)); + unlock_new_inode(ZTOI(zp)); } } @@ -679,10 +682,10 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name) error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name); if (error) { (void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0); - remove_inode_hash(ZTOI(zp)); - iput(ZTOI(zp)); } else { + VERIFY0(insert_inode_locked(ZTOI(zp))); d_instantiate(dentry, ZTOI(zp)); + unlock_new_inode(ZTOI(zp)); } } @@ -753,7 +756,7 @@ static int zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { cred_t *cr = CRED(); - struct inode *ip = old_dentry->d_inode; + znode_t *zp = ITOZ(old_dentry->d_inode); int error; fstrans_cookie_t cookie; @@ -761,22 +764,20 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) return (-ENAMETOOLONG); } - if (ip->i_nlink >= ZFS_LINK_MAX) + if (ZTOI(zp)->i_nlink >= ZFS_LINK_MAX) return (-EMLINK); crhold(cr); - zpl_inode_set_ctime_to_ts(ip, current_time(ip)); - /* Must have an existing ref, so igrab() cannot return NULL */ - VERIFY3P(igrab(ip), !=, NULL); - + zpl_inode_set_ctime_to_ts(ZTOI(zp), current_time(ZTOI(zp))); + zhold(zp); cookie = spl_fstrans_mark(); - error = -zfs_link(ITOZ(dir), ITOZ(ip), dname(dentry), cr, 0); + error = -zfs_link(ITOZ(dir), zp, dname(dentry), cr, 0); if (error) { - iput(ip); + zrele(zp); goto out; } - d_instantiate(dentry, ip); + d_instantiate(dentry, ZTOI(zp)); out: spl_fstrans_unmark(cookie); crfree(cr); diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index 287f5f36f9dd..07753b2b785c 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -86,7 +86,7 @@ zpl_evict_inode(struct inode *ip) fstrans_cookie_t cookie; cookie = spl_fstrans_mark(); - truncate_setsize(ip, 0); + truncate_inode_pages_final(&ip->i_data); clear_inode(ip); zfs_inactive(ip); spl_fstrans_unmark(cookie);