You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
334 lines
7.8 KiB
334 lines
7.8 KiB
// SPDX-License-Identifier: GPL-2.0-only |
|
/* |
|
* Common helpers for stackable filesystems and backing files. |
|
* |
|
* Forked from fs/overlayfs/file.c. |
|
* |
|
* Copyright (C) 2017 Red Hat, Inc. |
|
* Copyright (C) 2023 CTERA Networks. |
|
*/ |
|
|
|
#include <linux/fs.h> |
|
#include <linux/backing-file.h> |
|
#include <linux/splice.h> |
|
#include <linux/mm.h> |
|
|
|
#include "internal.h" |
|
|
|
/** |
|
* backing_file_open - open a backing file for kernel internal use |
|
* @user_path: path that the user reuqested to open |
|
* @flags: open flags |
|
* @real_path: path of the backing file |
|
* @cred: credentials for open |
|
* |
|
* Open a backing file for a stackable filesystem (e.g., overlayfs). |
|
* @user_path may be on the stackable filesystem and @real_path on the |
|
* underlying filesystem. In this case, we want to be able to return the |
|
* @user_path of the stackable filesystem. This is done by embedding the |
|
* returned file into a container structure that also stores the stacked |
|
* file's path, which can be retrieved using backing_file_user_path(). |
|
*/ |
|
struct file *backing_file_open(const struct path *user_path, int flags, |
|
const struct path *real_path, |
|
const struct cred *cred) |
|
{ |
|
struct file *f; |
|
int error; |
|
|
|
f = alloc_empty_backing_file(flags, cred); |
|
if (IS_ERR(f)) |
|
return f; |
|
|
|
path_get(user_path); |
|
*backing_file_user_path(f) = *user_path; |
|
error = vfs_open(real_path, f); |
|
if (error) { |
|
fput(f); |
|
f = ERR_PTR(error); |
|
} |
|
|
|
return f; |
|
} |
|
EXPORT_SYMBOL_GPL(backing_file_open); |
|
|
|
struct backing_aio { |
|
struct kiocb iocb; |
|
refcount_t ref; |
|
struct kiocb *orig_iocb; |
|
/* used for aio completion */ |
|
void (*end_write)(struct file *); |
|
struct work_struct work; |
|
long res; |
|
}; |
|
|
|
static struct kmem_cache *backing_aio_cachep; |
|
|
|
#define BACKING_IOCB_MASK \ |
|
(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND) |
|
|
|
static rwf_t iocb_to_rw_flags(int flags) |
|
{ |
|
return (__force rwf_t)(flags & BACKING_IOCB_MASK); |
|
} |
|
|
|
static void backing_aio_put(struct backing_aio *aio) |
|
{ |
|
if (refcount_dec_and_test(&aio->ref)) { |
|
fput(aio->iocb.ki_filp); |
|
kmem_cache_free(backing_aio_cachep, aio); |
|
} |
|
} |
|
|
|
static void backing_aio_cleanup(struct backing_aio *aio, long res) |
|
{ |
|
struct kiocb *iocb = &aio->iocb; |
|
struct kiocb *orig_iocb = aio->orig_iocb; |
|
|
|
if (aio->end_write) |
|
aio->end_write(orig_iocb->ki_filp); |
|
|
|
orig_iocb->ki_pos = iocb->ki_pos; |
|
backing_aio_put(aio); |
|
} |
|
|
|
static void backing_aio_rw_complete(struct kiocb *iocb, long res) |
|
{ |
|
struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb); |
|
struct kiocb *orig_iocb = aio->orig_iocb; |
|
|
|
if (iocb->ki_flags & IOCB_WRITE) |
|
kiocb_end_write(iocb); |
|
|
|
backing_aio_cleanup(aio, res); |
|
orig_iocb->ki_complete(orig_iocb, res); |
|
} |
|
|
|
static void backing_aio_complete_work(struct work_struct *work) |
|
{ |
|
struct backing_aio *aio = container_of(work, struct backing_aio, work); |
|
|
|
backing_aio_rw_complete(&aio->iocb, aio->res); |
|
} |
|
|
|
static void backing_aio_queue_completion(struct kiocb *iocb, long res) |
|
{ |
|
struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb); |
|
|
|
/* |
|
* Punt to a work queue to serialize updates of mtime/size. |
|
*/ |
|
aio->res = res; |
|
INIT_WORK(&aio->work, backing_aio_complete_work); |
|
queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq, |
|
&aio->work); |
|
} |
|
|
|
static int backing_aio_init_wq(struct kiocb *iocb) |
|
{ |
|
struct super_block *sb = file_inode(iocb->ki_filp)->i_sb; |
|
|
|
if (sb->s_dio_done_wq) |
|
return 0; |
|
|
|
return sb_init_dio_done_wq(sb); |
|
} |
|
|
|
|
|
ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, |
|
struct kiocb *iocb, int flags, |
|
struct backing_file_ctx *ctx) |
|
{ |
|
struct backing_aio *aio = NULL; |
|
const struct cred *old_cred; |
|
ssize_t ret; |
|
|
|
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) |
|
return -EIO; |
|
|
|
if (!iov_iter_count(iter)) |
|
return 0; |
|
|
|
if (iocb->ki_flags & IOCB_DIRECT && |
|
!(file->f_mode & FMODE_CAN_ODIRECT)) |
|
return -EINVAL; |
|
|
|
old_cred = override_creds(ctx->cred); |
|
if (is_sync_kiocb(iocb)) { |
|
rwf_t rwf = iocb_to_rw_flags(flags); |
|
|
|
ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf); |
|
} else { |
|
ret = -ENOMEM; |
|
aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); |
|
if (!aio) |
|
goto out; |
|
|
|
aio->orig_iocb = iocb; |
|
kiocb_clone(&aio->iocb, iocb, get_file(file)); |
|
aio->iocb.ki_complete = backing_aio_rw_complete; |
|
refcount_set(&aio->ref, 2); |
|
ret = vfs_iocb_iter_read(file, &aio->iocb, iter); |
|
backing_aio_put(aio); |
|
if (ret != -EIOCBQUEUED) |
|
backing_aio_cleanup(aio, ret); |
|
} |
|
out: |
|
revert_creds(old_cred); |
|
|
|
if (ctx->accessed) |
|
ctx->accessed(ctx->user_file); |
|
|
|
return ret; |
|
} |
|
EXPORT_SYMBOL_GPL(backing_file_read_iter); |
|
|
|
ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, |
|
struct kiocb *iocb, int flags, |
|
struct backing_file_ctx *ctx) |
|
{ |
|
const struct cred *old_cred; |
|
ssize_t ret; |
|
|
|
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) |
|
return -EIO; |
|
|
|
if (!iov_iter_count(iter)) |
|
return 0; |
|
|
|
ret = file_remove_privs(ctx->user_file); |
|
if (ret) |
|
return ret; |
|
|
|
if (iocb->ki_flags & IOCB_DIRECT && |
|
!(file->f_mode & FMODE_CAN_ODIRECT)) |
|
return -EINVAL; |
|
|
|
/* |
|
* Stacked filesystems don't support deferred completions, don't copy |
|
* this property in case it is set by the issuer. |
|
*/ |
|
flags &= ~IOCB_DIO_CALLER_COMP; |
|
|
|
old_cred = override_creds(ctx->cred); |
|
if (is_sync_kiocb(iocb)) { |
|
rwf_t rwf = iocb_to_rw_flags(flags); |
|
|
|
ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf); |
|
if (ctx->end_write) |
|
ctx->end_write(ctx->user_file); |
|
} else { |
|
struct backing_aio *aio; |
|
|
|
ret = backing_aio_init_wq(iocb); |
|
if (ret) |
|
goto out; |
|
|
|
ret = -ENOMEM; |
|
aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); |
|
if (!aio) |
|
goto out; |
|
|
|
aio->orig_iocb = iocb; |
|
aio->end_write = ctx->end_write; |
|
kiocb_clone(&aio->iocb, iocb, get_file(file)); |
|
aio->iocb.ki_flags = flags; |
|
aio->iocb.ki_complete = backing_aio_queue_completion; |
|
refcount_set(&aio->ref, 2); |
|
ret = vfs_iocb_iter_write(file, &aio->iocb, iter); |
|
backing_aio_put(aio); |
|
if (ret != -EIOCBQUEUED) |
|
backing_aio_cleanup(aio, ret); |
|
} |
|
out: |
|
revert_creds(old_cred); |
|
|
|
return ret; |
|
} |
|
EXPORT_SYMBOL_GPL(backing_file_write_iter); |
|
|
|
ssize_t backing_file_splice_read(struct file *in, loff_t *ppos, |
|
struct pipe_inode_info *pipe, size_t len, |
|
unsigned int flags, |
|
struct backing_file_ctx *ctx) |
|
{ |
|
const struct cred *old_cred; |
|
ssize_t ret; |
|
|
|
if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING))) |
|
return -EIO; |
|
|
|
old_cred = override_creds(ctx->cred); |
|
ret = vfs_splice_read(in, ppos, pipe, len, flags); |
|
revert_creds(old_cred); |
|
|
|
if (ctx->accessed) |
|
ctx->accessed(ctx->user_file); |
|
|
|
return ret; |
|
} |
|
EXPORT_SYMBOL_GPL(backing_file_splice_read); |
|
|
|
ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, |
|
struct file *out, loff_t *ppos, size_t len, |
|
unsigned int flags, |
|
struct backing_file_ctx *ctx) |
|
{ |
|
const struct cred *old_cred; |
|
ssize_t ret; |
|
|
|
if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING))) |
|
return -EIO; |
|
|
|
ret = file_remove_privs(ctx->user_file); |
|
if (ret) |
|
return ret; |
|
|
|
old_cred = override_creds(ctx->cred); |
|
file_start_write(out); |
|
ret = iter_file_splice_write(pipe, out, ppos, len, flags); |
|
file_end_write(out); |
|
revert_creds(old_cred); |
|
|
|
if (ctx->end_write) |
|
ctx->end_write(ctx->user_file); |
|
|
|
return ret; |
|
} |
|
EXPORT_SYMBOL_GPL(backing_file_splice_write); |
|
|
|
int backing_file_mmap(struct file *file, struct vm_area_struct *vma, |
|
struct backing_file_ctx *ctx) |
|
{ |
|
const struct cred *old_cred; |
|
int ret; |
|
|
|
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) || |
|
WARN_ON_ONCE(ctx->user_file != vma->vm_file)) |
|
return -EIO; |
|
|
|
if (!file->f_op->mmap) |
|
return -ENODEV; |
|
|
|
vma_set_file(vma, file); |
|
|
|
old_cred = override_creds(ctx->cred); |
|
ret = call_mmap(vma->vm_file, vma); |
|
revert_creds(old_cred); |
|
|
|
if (ctx->accessed) |
|
ctx->accessed(ctx->user_file); |
|
|
|
return ret; |
|
} |
|
EXPORT_SYMBOL_GPL(backing_file_mmap); |
|
|
|
static int __init backing_aio_init(void) |
|
{ |
|
backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN); |
|
if (!backing_aio_cachep) |
|
return -ENOMEM; |
|
|
|
return 0; |
|
} |
|
fs_initcall(backing_aio_init);
|
|
|