Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Linux: O_TMPFILE and insert inode hash rework #16772

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/os/linux/zfs/sys/zfs_znode_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ extern "C" {
#endif

#define ZNODE_OS_FIELDS \
boolean_t z_is_tmpfile; /* file is a tmpfile */ \
inode_timespec_t z_btime; /* creation/birth time (cached) */ \
struct inode z_inode;

Expand Down
3 changes: 2 additions & 1 deletion module/os/linux/zfs/zfs_dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -816,7 +816,8 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
mutex_enter(&zp->z_lock);

if (!(flag & ZRENAMING)) {
if (zp->z_unlinked) { /* no new links to unlinked zp */
if (zp->z_unlinked && !zp->z_is_tmpfile) {
/* no new links to unlinked zp */
ASSERT(!(flag & (ZNEW | ZEXISTS)));
mutex_exit(&zp->z_lock);
return (SET_ERROR(ENOENT));
Expand Down
2 changes: 1 addition & 1 deletion module/os/linux/zfs/zfs_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1717,7 +1717,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
* Must have an existing ref, so igrab()
* cannot return NULL
*/
VERIFY3P(igrab(*ipp), !=, NULL);
zhold(ITOZ(*ipp));
}
zfs_exit(zfsvfs, FTAG);
return (0);
Expand Down
45 changes: 20 additions & 25 deletions module/os/linux/zfs/zfs_vnops_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,6 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
* delete the newly created dnode.
*/
zfs_znode_delete(zp, tx);
remove_inode_hash(ZTOI(zp));
zfs_acl_ids_free(&acl_ids);
dmu_tx_commit(tx);
goto out;
Expand Down Expand Up @@ -945,9 +944,6 @@ zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
if (fuid_dirtied)
zfs_fuid_sync(zfsvfs, tx);

/* Add to unlinked set */
zp->z_unlinked = B_TRUE;
zfs_unlinked_add(zp, tx);
zfs_acl_ids_free(&acl_ids);
dmu_tx_commit(tx);
out:
Expand Down Expand Up @@ -1363,7 +1359,6 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
error = zfs_link_create(dl, zp, tx, ZNEW);
if (error != 0) {
zfs_znode_delete(zp, tx);
remove_inode_hash(ZTOI(zp));
goto out;
}

Expand Down Expand Up @@ -3168,10 +3163,12 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
zfs_mknode(sdzp, wo_vap, tx, cr, 0, &wzp, &acl_ids);
error = zfs_link_create(sdl, wzp, tx, ZNEW);
if (error) {
unlock_new_inode(ZTOI(wzp));
zfs_znode_delete(wzp, tx);
remove_inode_hash(ZTOI(wzp));
goto commit_unlink_td_szp;
}
VERIFY0(insert_inode_locked(ZTOI(wzp)));
unlock_new_inode(ZTOI(wzp));
break;
}

Expand Down Expand Up @@ -3406,7 +3403,6 @@ zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
error = zfs_link_create(dl, zp, tx, ZNEW);
if (error != 0) {
zfs_znode_delete(zp, tx);
remove_inode_hash(ZTOI(zp));
} else {
if (flags & FIGNORECASE)
txtype |= TX_CI;
Expand Down Expand Up @@ -3503,11 +3499,8 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
uint64_t parent;
uid_t owner;
boolean_t waited = B_FALSE;
boolean_t is_tmpfile = 0;
uint64_t txg;

is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE));

ASSERT(S_ISDIR(ZTOI(tdzp)->i_mode));

if (name == NULL)
Expand Down Expand Up @@ -3610,7 +3603,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
if (is_tmpfile)
if (szp->z_is_tmpfile && szp->z_unlinked)
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);

zfs_sa_upgrade_txholds(tx, szp);
Expand All @@ -3628,41 +3621,43 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
zfs_exit(zfsvfs, FTAG);
return (error);
}
/* unmark z_unlinked so zfs_link_create will not reject */
if (is_tmpfile)
szp->z_unlinked = B_FALSE;
error = zfs_link_create(dl, szp, tx, 0);

if (error == 0) {
uint64_t txtype = TX_LINK;
/*
* tmpfile is created to be in z_unlinkedobj, so remove it.
* Also, we don't log in ZIL, because all previous file
* We don't log tmpfile in ZIL, because all previous file
* operation on the tmpfile are ignored by ZIL. Instead we
* always wait for txg to sync to make sure all previous
* operation are sync safe.
*/
if (is_tmpfile) {
VERIFY(zap_remove_int(zfsvfs->z_os,
zfsvfs->z_unlinkedobj, szp->z_id, tx) == 0);
} else {
if (!szp->z_is_tmpfile || !szp->z_unlinked) {
if (flags & FIGNORECASE)
txtype |= TX_CI;
zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
}
} else if (is_tmpfile) {
/* restore z_unlinked since when linking failed */
szp->z_unlinked = B_TRUE;
if (szp->z_is_tmpfile) {
mutex_enter(&szp->z_lock);
if (szp->z_unlinked) {
szp->z_unlinked = B_FALSE;
VERIFY0(zap_remove_int(zfsvfs->z_os,
zfsvfs->z_unlinkedobj,
szp->z_id, tx));
}
mutex_exit(&szp->z_lock);
}
}
txg = dmu_tx_get_txg(tx);
dmu_tx_commit(tx);

zfs_dirent_unlock(dl);

if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
if ((!szp->z_is_tmpfile || !szp->z_unlinked) &&
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);

if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED)
if (szp->z_is_tmpfile && szp->z_unlinked &&
(zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED))
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg);

zfs_znode_update_vfs(tdzp);
Expand Down
29 changes: 10 additions & 19 deletions module/os/linux/zfs/zfs_znode_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
ASSERT3P(zp->z_acl_cached, ==, NULL);
ASSERT3P(zp->z_xattr_cached, ==, NULL);
zp->z_unlinked = B_FALSE;
zp->z_is_tmpfile = B_FALSE;
zp->z_atime_dirty = B_FALSE;
zp->z_is_ctldir = B_FALSE;
zp->z_suspended = B_FALSE;
Expand Down Expand Up @@ -590,28 +591,10 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
zfs_znode_update_vfs(zp);
zfs_inode_set_ops(zfsvfs, ip);

/*
* The only way insert_inode_locked() can fail is if the ip->i_ino
* number is already hashed for this super block. This can never
* happen because the inode numbers map 1:1 with the object numbers.
*
* Exceptions include rolling back a mounted file system, either
* from the zfs rollback or zfs recv command.
*
* Active inodes are unhashed during the rollback, but since zrele
* can happen asynchronously, we can't guarantee they've been
* unhashed. This can cause hash collisions in unlinked drain
* processing so do not hash unlinked znodes.
*/
if (links > 0)
VERIFY3S(insert_inode_locked(ip), ==, 0);

mutex_enter(&zfsvfs->z_znodes_lock);
list_insert_tail(&zfsvfs->z_all_znodes, zp);
mutex_exit(&zfsvfs->z_znodes_lock);

if (links > 0)
unlock_new_inode(ip);
return (zp);

error:
Expand Down Expand Up @@ -924,6 +907,12 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
(*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode;
(*zpp)->z_dnodesize = dnodesize;
(*zpp)->z_projid = projid;
if (flag & IS_TMPFILE) {
(*zpp)->z_is_tmpfile = B_TRUE;
/* Add to unlinked set */
(*zpp)->z_unlinked = B_TRUE;
zfs_unlinked_add((*zpp), tx);
}

if (obj_type == DMU_OT_ZNODE ||
acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
Expand Down Expand Up @@ -1106,7 +1095,7 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
* the VFS that this inode should not be evicted.
*/
if (igrab(ZTOI(zp)) == NULL) {
if (zp->z_unlinked)
if (zp->z_unlinked && !zp->z_is_tmpfile)
err = SET_ERROR(ENOENT);
else
err = SET_ERROR(EAGAIN);
Expand Down Expand Up @@ -1143,6 +1132,8 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
err = SET_ERROR(ENOENT);
} else {
*zpp = zp;
VERIFY0(insert_inode_locked(ZTOI(zp)));
unlock_new_inode(ZTOI(zp));
}
zfs_znode_hold_exit(zfsvfs, zh);
return (err);
Expand Down
65 changes: 33 additions & 32 deletions module/os/linux/zfs/zpl_inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,10 @@ zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)

if (error) {
(void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0);
remove_inode_hash(ZTOI(zp));
iput(ZTOI(zp));
} else {
VERIFY0(insert_inode_locked(ZTOI(zp)));
d_instantiate(dentry, ZTOI(zp));
unlock_new_inode(ZTOI(zp));
}
}

Expand Down Expand Up @@ -261,10 +261,10 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,

if (error) {
(void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0);
remove_inode_hash(ZTOI(zp));
iput(ZTOI(zp));
} else {
VERIFY0(insert_inode_locked(ZTOI(zp)));
d_instantiate(dentry, ZTOI(zp));
unlock_new_inode(ZTOI(zp));
}
}

Expand Down Expand Up @@ -301,6 +301,16 @@ zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
zidmap_t *userns = kcred->user_ns;
#endif

#ifndef HAVE_TMPFILE_DENTRY
#define fname &file->f_path.dentry->d_name
#define tmpfp file
#define _fini error = finish_open_simple(file, error)
#else
#define fname &dentry->d_name
#define tmpfp dentry
#define _fini
#endif

crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
/*
Expand All @@ -314,23 +324,16 @@ zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
cookie = spl_fstrans_mark();
error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL, userns);
if (error == 0) {
/* d_tmpfile will do drop_nlink, so we should set it first */
set_nlink(ip, 1);
#ifndef HAVE_TMPFILE_DENTRY
d_tmpfile(file, ip);

error = zpl_xattr_security_init(ip, dir,
&file->f_path.dentry->d_name);
#else
d_tmpfile(dentry, ip);

error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
#endif
error = zpl_xattr_security_init(ip, dir, fname);
if (error == 0)
error = zpl_init_acl(ip, dir);
#ifndef HAVE_TMPFILE_DENTRY
error = finish_open_simple(file, error);
#endif
if (error == 0) {
VERIFY0(insert_inode_locked(ip));
set_nlink(ip, 1);
d_tmpfile(tmpfp, ip);
unlock_new_inode(ip);
}
_fini;
/*
* don't need to handle error here, file is already in
* unlinked set.
Expand Down Expand Up @@ -409,10 +412,10 @@ zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)

if (error) {
(void) zfs_rmdir(ITOZ(dir), dname(dentry), NULL, cr, 0);
remove_inode_hash(ZTOI(zp));
iput(ZTOI(zp));
} else {
VERIFY0(insert_inode_locked(ZTOI(zp)));
d_instantiate(dentry, ZTOI(zp));
unlock_new_inode(ZTOI(zp));
}
}

Expand Down Expand Up @@ -679,10 +682,10 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
if (error) {
(void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0);
remove_inode_hash(ZTOI(zp));
iput(ZTOI(zp));
} else {
VERIFY0(insert_inode_locked(ZTOI(zp)));
d_instantiate(dentry, ZTOI(zp));
unlock_new_inode(ZTOI(zp));
}
}

Expand Down Expand Up @@ -753,30 +756,28 @@ static int
zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
cred_t *cr = CRED();
struct inode *ip = old_dentry->d_inode;
znode_t *zp = ITOZ(old_dentry->d_inode);
int error;
fstrans_cookie_t cookie;

if (is_nametoolong(dentry)) {
return (-ENAMETOOLONG);
}

if (ip->i_nlink >= ZFS_LINK_MAX)
if (ZTOI(zp)->i_nlink >= ZFS_LINK_MAX)
return (-EMLINK);

crhold(cr);
zpl_inode_set_ctime_to_ts(ip, current_time(ip));
/* Must have an existing ref, so igrab() cannot return NULL */
VERIFY3P(igrab(ip), !=, NULL);

zpl_inode_set_ctime_to_ts(ZTOI(zp), current_time(ZTOI(zp)));
zhold(zp);
cookie = spl_fstrans_mark();
error = -zfs_link(ITOZ(dir), ITOZ(ip), dname(dentry), cr, 0);
error = -zfs_link(ITOZ(dir), zp, dname(dentry), cr, 0);
if (error) {
iput(ip);
zrele(zp);
goto out;
}

d_instantiate(dentry, ip);
d_instantiate(dentry, ZTOI(zp));
out:
spl_fstrans_unmark(cookie);
crfree(cr);
Expand Down
2 changes: 1 addition & 1 deletion module/os/linux/zfs/zpl_super.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ zpl_evict_inode(struct inode *ip)
fstrans_cookie_t cookie;

cookie = spl_fstrans_mark();
truncate_setsize(ip, 0);
truncate_inode_pages_final(&ip->i_data);
clear_inode(ip);
zfs_inactive(ip);
spl_fstrans_unmark(cookie);
Expand Down
Loading