Skip to content

Commit

Permalink
Merge tag 'gfs2-v5.15-rc5-mmap-fault' of git://git.kernel.org/pub/scm…
Browse files Browse the repository at this point in the history
…/linux/kernel/git/gfs2/linux-gfs2

Pull gfs2 mmap + page fault deadlocks fixes from Andreas Gruenbacher:
 "Functions gfs2_file_read_iter and gfs2_file_write_iter are both
  accessing the user buffer to write to or read from while holding the
  inode glock.

  In the most basic deadlock scenario, that buffer will not be resident
  and it will be mapped to the same file. Accessing the buffer will
  trigger a page fault, and gfs2 will deadlock trying to take the same
  inode glock again while trying to handle that fault.

  Fix that and similar, more complex scenarios by disabling page faults
  while accessing user buffers. To make this work, introduce a small
  amount of new infrastructure and fix some bugs that didn't trigger so
  far, with page faults enabled"

* tag 'gfs2-v5.15-rc5-mmap-fault' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2:
  gfs2: Fix mmap + page fault deadlocks for direct I/O
  iov_iter: Introduce nofault flag to disable page faults
  gup: Introduce FOLL_NOFAULT flag to disable page faults
  iomap: Add done_before argument to iomap_dio_rw
  iomap: Support partial direct I/O on user copy failures
  iomap: Fix iomap_dio_rw return value for user copies
  gfs2: Fix mmap + page fault deadlocks for buffered I/O
  gfs2: Eliminate ip->i_gh
  gfs2: Move the inode glock locking to gfs2_file_buffered_write
  gfs2: Introduce flag for glock holder auto-demotion
  gfs2: Clean up function may_grant
  gfs2: Add wrapper for iomap_file_buffered_write
  iov_iter: Introduce fault_in_iov_iter_writeable
  iov_iter: Turn iov_iter_fault_in_readable into fault_in_iov_iter_readable
  gup: Turn fault_in_pages_{readable,writeable} into fault_in_{readable,writeable}
  powerpc/kvm: Fix kvm_use_magic_page
  iov_iter: Fix iov_iter_get_pages{,_alloc} page fault return value
  • Loading branch information
torvalds committed Nov 2, 2021
2 parents ab2e7f4 + b01b2d7 commit c03098d
Show file tree
Hide file tree
Showing 29 changed files with 791 additions and 283 deletions.
3 changes: 2 additions & 1 deletion arch/powerpc/kernel/kvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,8 @@ static void __init kvm_use_magic_page(void)
on_each_cpu(kvm_map_magic_page, &features, 1);

/* Quick self-test to see if the mapping works */
if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) {
if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE,
sizeof(u32))) {
kvm_patching_worked = false;
return;
}
Expand Down
4 changes: 2 additions & 2 deletions arch/powerpc/kernel/signal_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -1048,7 +1048,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
if (new_ctx == NULL)
return 0;
if (!access_ok(new_ctx, ctx_size) ||
fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
fault_in_readable((char __user *)new_ctx, ctx_size))
return -EFAULT;

/*
Expand Down Expand Up @@ -1237,7 +1237,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
#endif

if (!access_ok(ctx, sizeof(*ctx)) ||
fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx)))
fault_in_readable((char __user *)ctx, sizeof(*ctx)))
return -EFAULT;

/*
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/kernel/signal_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
if (new_ctx == NULL)
return 0;
if (!access_ok(new_ctx, ctx_size) ||
fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
fault_in_readable((char __user *)new_ctx, ctx_size))
return -EFAULT;

/*
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/fpu/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore,
if (ret != X86_TRAP_PF)
return false;

if (!fault_in_pages_readable(buf, size))
if (!fault_in_readable(buf, size))
goto retry;
return false;
}
Expand Down
7 changes: 3 additions & 4 deletions drivers/gpu/drm/armada/armada_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
struct drm_armada_gem_pwrite *args = data;
struct armada_gem_object *dobj;
char __user *ptr;
int ret;
int ret = 0;

DRM_DEBUG_DRIVER("handle %u off %u size %u ptr 0x%llx\n",
args->handle, args->offset, args->size, args->ptr);
Expand All @@ -349,9 +349,8 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (!access_ok(ptr, args->size))
return -EFAULT;

ret = fault_in_pages_readable(ptr, args->size);
if (ret)
return ret;
if (fault_in_readable(ptr, args->size))
return -EFAULT;

dobj = armada_gem_object_lookup(file, args->handle);
if (dobj == NULL)
Expand Down
7 changes: 4 additions & 3 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1718,7 +1718,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
* Fault pages before locking them in prepare_pages
* to avoid recursive lock
*/
if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
if (unlikely(fault_in_iov_iter_readable(i, write_bytes))) {
ret = -EFAULT;
break;
}
Expand Down Expand Up @@ -1965,7 +1965,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
}

dio = __iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
0);
0, 0);

btrfs_inode_unlock(inode, ilock_flags);

Expand Down Expand Up @@ -3668,7 +3668,8 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
return 0;

btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops, 0);
ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
0, 0);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
return ret;
}
Expand Down
5 changes: 2 additions & 3 deletions fs/btrfs/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -2222,9 +2222,8 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = sk->min_offset;

while (1) {
ret = fault_in_pages_writeable(ubuf + sk_offset,
*buf_size - sk_offset);
if (ret)
ret = -EFAULT;
if (fault_in_writeable(ubuf + sk_offset, *buf_size - sk_offset))
break;

ret = btrfs_search_forward(root, &key, path, sk->min_transid);
Expand Down
2 changes: 1 addition & 1 deletion fs/erofs/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)

if (!err)
return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
NULL, 0);
NULL, 0, 0);
if (err < 0)
return err;
}
Expand Down
5 changes: 3 additions & 2 deletions fs/ext4/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
return generic_file_read_iter(iocb, to);
}

ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0);
ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0, 0);
inode_unlock_shared(inode);

file_accessed(iocb->ki_filp);
Expand Down Expand Up @@ -566,7 +566,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ilock_shared)
iomap_ops = &ext4_iomap_overwrite_ops;
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
(unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0);
(unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0,
0);
if (ret == -ENOTBLK)
ret = 0;

Expand Down
2 changes: 1 addition & 1 deletion fs/f2fs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -4276,7 +4276,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
size_t target_size = 0;
int err;

if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
if (fault_in_iov_iter_readable(from, iov_iter_count(from)))
set_inode_flag(inode, FI_NO_PREALLOC);

if ((iocb->ki_flags & IOCB_NOWAIT)) {
Expand Down
2 changes: 1 addition & 1 deletion fs/fuse/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,

again:
err = -EFAULT;
if (iov_iter_fault_in_readable(ii, bytes))
if (fault_in_iov_iter_readable(ii, bytes))
break;

err = -ENOMEM;
Expand Down
60 changes: 1 addition & 59 deletions fs/gfs2/bmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -961,46 +961,6 @@ static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
goto out;
}

static int gfs2_write_lock(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
int error;

gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
error = gfs2_glock_nq(&ip->i_gh);
if (error)
goto out_uninit;
if (&ip->i_inode == sdp->sd_rindex) {
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);

error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
GL_NOCACHE, &m_ip->i_gh);
if (error)
goto out_unlock;
}
return 0;

out_unlock:
gfs2_glock_dq(&ip->i_gh);
out_uninit:
gfs2_holder_uninit(&ip->i_gh);
return error;
}

static void gfs2_write_unlock(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);

if (&ip->i_inode == sdp->sd_rindex) {
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);

gfs2_glock_dq_uninit(&m_ip->i_gh);
}
gfs2_glock_dq_uninit(&ip->i_gh);
}

static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
unsigned len)
{
Expand Down Expand Up @@ -1118,11 +1078,6 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
return ret;
}

static inline bool gfs2_iomap_need_write_lock(unsigned flags)
{
return (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT);
}

static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, struct iomap *iomap,
struct iomap *srcmap)
Expand All @@ -1135,12 +1090,6 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
iomap->flags |= IOMAP_F_BUFFER_HEAD;

trace_gfs2_iomap_start(ip, pos, length, flags);
if (gfs2_iomap_need_write_lock(flags)) {
ret = gfs2_write_lock(inode);
if (ret)
goto out;
}

ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
if (ret)
goto out_unlock;
Expand Down Expand Up @@ -1168,10 +1117,7 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);

out_unlock:
if (ret && gfs2_iomap_need_write_lock(flags))
gfs2_write_unlock(inode);
release_metapath(&mp);
out:
trace_gfs2_iomap_end(ip, iomap, ret);
return ret;
}
Expand Down Expand Up @@ -1219,15 +1165,11 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
}

if (unlikely(!written))
goto out_unlock;
return 0;

if (iomap->flags & IOMAP_F_SIZE_CHANGED)
mark_inode_dirty(inode);
set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);

out_unlock:
if (gfs2_iomap_need_write_lock(flags))
gfs2_write_unlock(inode);
return 0;
}

Expand Down
Loading

0 comments on commit c03098d

Please sign in to comment.