diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 140bd5730d99..f902a7fb4630 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -121,7 +121,7 @@ static ssize_t fuse_conn_max_background_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - unsigned val; + unsigned int val = 0; ssize_t ret; ret = fuse_conn_limit_write(file, buf, count, ppos, &val, @@ -163,7 +163,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - unsigned val; + unsigned int val = 0; struct fuse_conn *fc; ssize_t ret; diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index dfcb98a654d8..174333633471 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -527,7 +527,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file) cc->fc.initialized = 1; rc = cuse_send_init(cc); if (rc) { - fuse_dev_free(fud); + fuse_dev_put(fud); return rc; } file->private_data = fud; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 0b0241f47170..5dda7080f4a9 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -570,6 +570,11 @@ static void request_wait_answer(struct fuse_req *req) if (!err) return; + if (req->args->abort_on_kill) { + fuse_abort_conn(fc); + return; + } + if (test_bit(FR_URING, &req->flags)) removed = fuse_uring_remove_pending_req(req); else @@ -676,7 +681,8 @@ ssize_t __fuse_simple_request(struct mnt_idmap *idmap, fuse_force_creds(req); __set_bit(FR_WAITING, &req->flags); - __set_bit(FR_FORCE, &req->flags); + if (!args->abort_on_kill) + __set_bit(FR_FORCE, &req->flags); } else { WARN_ON(args->nocreds); req = fuse_get_req(idmap, fm, false); @@ -1011,6 +1017,9 @@ static int fuse_try_move_folio(struct fuse_copy_state *cs, struct folio **foliop folio_clear_uptodate(newfolio); folio_clear_mappedtodisk(newfolio); + if (folio_test_large(newfolio)) + goto out_fallback_unlock; + if (fuse_check_folio(newfolio) != 0) goto out_fallback_unlock; @@ -1539,32 +1548,24 @@ out_end: static int fuse_dev_open(struct inode *inode, struct file *file) { - /* - * The fuse device's file's private_data is used to hold - * the fuse_conn(ection) when it is mounted, and is used to - * keep track of whether the file has been mounted already. - */ - file->private_data = NULL; + struct fuse_dev *fud = fuse_dev_alloc(); + + if (!fud) + return -ENOMEM; + + file->private_data = fud; return 0; } struct fuse_dev *fuse_get_dev(struct file *file) { - struct fuse_dev *fud = __fuse_get_dev(file); + struct fuse_dev *fud = fuse_file_to_fud(file); int err; - if (likely(fud)) - return fud; - - err = wait_event_interruptible(fuse_dev_waitq, - READ_ONCE(file->private_data) != FUSE_DEV_SYNC_INIT); + err = wait_event_interruptible(fuse_dev_waitq, fuse_dev_fc_get(fud) != NULL); if (err) return ERR_PTR(err); - fud = __fuse_get_dev(file); - if (!fud) - return ERR_PTR(-EPERM); - return fud; } @@ -1764,10 +1765,9 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, struct address_space *mapping; u64 nodeid; int err; - pgoff_t index; - unsigned int offset; unsigned int num; loff_t file_size; + loff_t pos; loff_t end; if (size < sizeof(outarg)) @@ -1780,7 +1780,12 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, if (size - sizeof(outarg) != outarg.size) return -EINVAL; + if (outarg.offset >= MAX_LFS_FILESIZE) + return -EINVAL; + nodeid = outarg.nodeid; + pos = outarg.offset; + num = min(outarg.size, MAX_LFS_FILESIZE - pos); down_read(&fc->killsb); @@ -1790,33 +1795,29 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, goto out_up_killsb; mapping = inode->i_mapping; - index = outarg.offset >> PAGE_SHIFT; - offset = outarg.offset & ~PAGE_MASK; file_size = i_size_read(inode); - end = outarg.offset + outarg.size; + end = pos + num; if (end > file_size) { file_size = end; - fuse_write_update_attr(inode, file_size, outarg.size); + fuse_write_update_attr(inode, file_size, num); } - num = outarg.size; while (num) { struct folio *folio; unsigned int folio_offset; unsigned int nr_bytes; - unsigned int nr_pages; + pgoff_t index = pos >> PAGE_SHIFT; folio = filemap_grab_folio(mapping, index); err = PTR_ERR(folio); if (IS_ERR(folio)) goto out_iput; - folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset; + folio_offset = offset_in_folio(folio, pos); nr_bytes = min(num, folio_size(folio) - folio_offset); - nr_pages = (offset + nr_bytes + PAGE_SIZE - 1) >> PAGE_SHIFT; err = fuse_copy_folio(cs, &folio, folio_offset, nr_bytes, 0); - if (!folio_test_uptodate(folio) && !err && offset == 0 && + if (!folio_test_uptodate(folio) && !err && folio_offset == 0 && (nr_bytes == folio_size(folio) || file_size == end)) { folio_zero_segment(folio, nr_bytes, folio_size(folio)); folio_mark_uptodate(folio); @@ -1827,9 +1828,8 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, if (err) goto out_iput; + pos += nr_bytes; num -= nr_bytes; - offset = 0; - index += nr_pages; } err = 0; @@ -1861,7 +1861,6 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, { int err; struct address_space *mapping = inode->i_mapping; - pgoff_t index; loff_t file_size; unsigned int num; unsigned int offset; @@ -1872,17 +1871,18 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, size_t args_size = sizeof(*ra); struct fuse_args_pages *ap; struct fuse_args *args; + loff_t pos = outarg->offset; - offset = outarg->offset & ~PAGE_MASK; + offset = offset_in_page(pos); file_size = i_size_read(inode); num = min(outarg->size, fc->max_write); - if (outarg->offset > file_size) + if (pos > file_size) num = 0; - else if (outarg->offset + num > file_size) - num = file_size - outarg->offset; + else if (num > file_size - pos) + num = file_size - pos; - num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + num_pages = DIV_ROUND_UP(num + offset, PAGE_SIZE); num_pages = min(num_pages, fc->max_pages); num = min(num, num_pages << PAGE_SHIFT); @@ -1903,31 +1903,27 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, args->in_pages = true; args->end = fuse_retrieve_end; - index = outarg->offset >> PAGE_SHIFT; - while (num && ap->num_folios < num_pages) { struct folio *folio; unsigned int folio_offset; unsigned int nr_bytes; - unsigned int nr_pages; + pgoff_t index = pos >> PAGE_SHIFT; folio = filemap_get_folio(mapping, index); if (IS_ERR(folio)) break; - folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset; + folio_offset = offset_in_folio(folio, pos); nr_bytes = min(folio_size(folio) - folio_offset, num); - nr_pages = (offset + nr_bytes + PAGE_SIZE - 1) >> PAGE_SHIFT; ap->folios[ap->num_folios] = folio; ap->descs[ap->num_folios].offset = folio_offset; ap->descs[ap->num_folios].length = nr_bytes; ap->num_folios++; - offset = 0; + pos += nr_bytes; num -= nr_bytes; total_len += nr_bytes; - index += nr_pages; } ra->inarg.offset = outarg->offset; ra->inarg.size = total_len; @@ -1961,6 +1957,9 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, fuse_copy_finish(cs); + if (outarg.offset >= MAX_LFS_FILESIZE) + return -EINVAL; + down_read(&fc->killsb); err = -ENOENT; nodeid = outarg.nodeid; @@ -2091,6 +2090,13 @@ static int fuse_notify_prune(struct fuse_conn *fc, unsigned int size, static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, unsigned int size, struct fuse_copy_state *cs) { + /* + * Only allow notifications during while the connection is in an + * initialized and connected state + */ + if (!fc->initialized || !fc->connected) + return -EINVAL; + /* Don't try to move folios (yet) */ cs->move_folios = false; @@ -2533,13 +2539,15 @@ void fuse_wait_aborted(struct fuse_conn *fc) int fuse_dev_release(struct inode *inode, struct file *file) { - struct fuse_dev *fud = __fuse_get_dev(file); + struct fuse_dev *fud = fuse_file_to_fud(file); + /* Pairs with cmpxchg() in fuse_dev_install() */ + struct fuse_conn *fc = xchg(&fud->fc, FUSE_DEV_FC_DISCONNECTED); - if (fud) { - struct fuse_conn *fc = fud->fc; + if (fc) { struct fuse_pqueue *fpq = &fud->pq; LIST_HEAD(to_end); unsigned int i; + bool last; spin_lock(&fpq->lock); WARN_ON(!list_empty(&fpq->io)); @@ -2549,13 +2557,19 @@ int fuse_dev_release(struct inode *inode, struct file *file) fuse_dev_end_requests(&to_end); + spin_lock(&fc->lock); + list_del(&fud->entry); /* Are we the last open device? */ - if (atomic_dec_and_test(&fc->dev_count)) { + last = list_empty(&fc->devices); + spin_unlock(&fc->lock); + + if (last) { WARN_ON(fc->iq.fasync != NULL); fuse_abort_conn(fc); } - fuse_dev_free(fud); + fuse_conn_put(fc); } + fuse_dev_put(fud); return 0; } EXPORT_SYMBOL_GPL(fuse_dev_release); @@ -2571,28 +2585,10 @@ static int fuse_dev_fasync(int fd, struct file *file, int on) return fasync_helper(fd, file, on, &fud->fc->iq.fasync); } -static int fuse_device_clone(struct fuse_conn *fc, struct file *new) -{ - struct fuse_dev *fud; - - if (__fuse_get_dev(new)) - return -EINVAL; - - fud = fuse_dev_alloc_install(fc); - if (!fud) - return -ENOMEM; - - new->private_data = fud; - atomic_inc(&fc->dev_count); - - return 0; -} - static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp) { - int res; int oldfd; - struct fuse_dev *fud = NULL; + struct fuse_dev *fud, *new_fud; if (get_user(oldfd, argp)) return -EFAULT; @@ -2605,17 +2601,20 @@ static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp) * Check against file->f_op because CUSE * uses the same ioctl handler. */ - if (fd_file(f)->f_op == file->f_op) - fud = __fuse_get_dev(fd_file(f)); + if (fd_file(f)->f_op != file->f_op) + return -EINVAL; - res = -EINVAL; - if (fud) { - mutex_lock(&fuse_mutex); - res = fuse_device_clone(fud->fc, file); - mutex_unlock(&fuse_mutex); - } + fud = fuse_get_dev(fd_file(f)); + if (IS_ERR(fud)) + return PTR_ERR(fud); - return res; + new_fud = fuse_file_to_fud(file); + if (fuse_dev_fc_get(new_fud)) + return -EINVAL; + + fuse_dev_install(new_fud, fud->fc); + + return 0; } static long fuse_dev_ioctl_backing_open(struct file *file, @@ -2656,10 +2655,11 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp) static long fuse_dev_ioctl_sync_init(struct file *file) { int err = -EINVAL; + struct fuse_dev *fud = fuse_file_to_fud(file); mutex_lock(&fuse_mutex); - if (!__fuse_get_dev(file)) { - WRITE_ONCE(file->private_data, FUSE_DEV_SYNC_INIT); + if (!fuse_dev_fc_get(fud)) { + fud->sync_init = true; err = 0; } mutex_unlock(&fuse_mutex); diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index 3a38b61aac26..7b9822e8837b 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -397,6 +397,20 @@ static void fuse_uring_teardown_entries(struct fuse_ring_queue *queue) FRRS_AVAILABLE); } +static void fuse_uring_teardown_all_queues(struct fuse_ring *ring) +{ + int qid; + + for (qid = 0; qid < ring->nr_queues; qid++) { + struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]); + + if (!queue) + continue; + + fuse_uring_teardown_entries(queue); + } +} + /* * Log state debug info */ @@ -431,19 +445,10 @@ static void fuse_uring_log_ent_state(struct fuse_ring *ring) static void fuse_uring_async_stop_queues(struct work_struct *work) { - int qid; struct fuse_ring *ring = container_of(work, struct fuse_ring, async_teardown_work.work); - /* XXX code dup */ - for (qid = 0; qid < ring->nr_queues; qid++) { - struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]); - - if (!queue) - continue; - - fuse_uring_teardown_entries(queue); - } + fuse_uring_teardown_all_queues(ring); /* * Some ring entries might be in the middle of IO operations, @@ -469,16 +474,7 @@ static void fuse_uring_async_stop_queues(struct work_struct *work) */ void fuse_uring_stop_queues(struct fuse_ring *ring) { - int qid; - - for (qid = 0; qid < ring->nr_queues; qid++) { - struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]); - - if (!queue) - continue; - - fuse_uring_teardown_entries(queue); - } + fuse_uring_teardown_all_queues(ring); if (atomic_read(&ring->queue_refs) > 0) { ring->teardown_time = jiffies; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 7ac6b232ef12..b658b6baf72f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -354,8 +354,8 @@ static void fuse_invalidate_entry(struct dentry *entry) fuse_invalidate_entry_cache(entry); } -static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, - u64 nodeid, const struct qstr *name, +static void fuse_lookup_init(struct fuse_args *args, u64 nodeid, + const struct qstr *name, struct fuse_entry_out *outarg) { memset(outarg, 0, sizeof(struct fuse_entry_out)); @@ -421,8 +421,7 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name, attr_version = fuse_get_attr_version(fm->fc); - fuse_lookup_init(fm->fc, &args, get_node_id(dir), - name, &outarg); + fuse_lookup_init(&args, get_node_id(dir), name, &outarg); ret = fuse_simple_request(fm, &args); /* Zero nodeid is same as -ENOENT */ if (!ret && !outarg.nodeid) @@ -481,6 +480,11 @@ static int fuse_dentry_init(struct dentry *dentry) fd->dentry = dentry; RB_CLEAR_NODE(&fd->node); dentry->d_fsdata = fd; + /* + * Initialising d_time (epoch) to '0' ensures the dentry is invalid + * if compared to fc->epoch, which is initialized to '1'. + */ + dentry->d_time = 0; return 0; } @@ -570,7 +574,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name attr_version = fuse_get_attr_version(fm->fc); evict_ctr = fuse_get_evict_ctr(fm->fc); - fuse_lookup_init(fm->fc, &args, nodeid, name, outarg); + fuse_lookup_init(&args, nodeid, name, outarg); err = fuse_simple_request(fm, &args); /* Zero nodeid is same as -ENOENT, but with valid timeout */ if (err || !outarg->nodeid) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f6240f24b814..c59452d60b8d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -117,6 +117,12 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) fuse_simple_request(ff->fm, args); fuse_release_end(ff->fm, args, 0); } else { + /* + * DAX inodes may need to issue a number of synchronous + * request for clearing the mappings. + */ + if (ra && ra->inode && FUSE_IS_DAX(ra->inode)) + args->may_block = true; args->end = fuse_release_end; if (fuse_simple_background(ff->fm, args, GFP_KERNEL | __GFP_NOFAIL)) @@ -1243,7 +1249,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, { struct fuse_args_pages *ap = &ia->ap; struct fuse_conn *fc = get_fuse_conn(mapping->host); - unsigned offset = pos & (PAGE_SIZE - 1); size_t count = 0; unsigned int num; int err = 0; @@ -1270,7 +1275,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, if (mapping_writably_mapped(mapping)) flush_dcache_folio(folio); - folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset; + folio_offset = offset_in_folio(folio, pos); bytes = min(folio_size(folio) - folio_offset, num); tmp = copy_folio_from_iter_atomic(folio, folio_offset, bytes, ii); @@ -1300,9 +1305,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, count += tmp; pos += tmp; num -= tmp; - offset += tmp; - if (offset == folio_size(folio)) - offset = 0; /* If we copied full folio, mark it uptodate */ if (tmp == folio_size(folio)) @@ -1314,7 +1316,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, ia->write.folio_locked = true; break; } - if (!fc->big_writes || offset != 0) + if (!fc->big_writes) + break; + if (folio_offset + tmp != folio_size(folio)) break; } @@ -2173,7 +2177,7 @@ static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos, WARN_ON(!ap->num_folios); /* Reached max pages */ - if ((bytes + PAGE_SIZE - 1) >> PAGE_SHIFT > fc->max_pages) + if (DIV_ROUND_UP(bytes, PAGE_SIZE) > fc->max_pages) return true; if (bytes > max_bytes) diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h index 134bf44aff0d..910f883cd090 100644 --- a/fs/fuse/fuse_dev_i.h +++ b/fs/fuse/fuse_dev_i.h @@ -39,18 +39,36 @@ struct fuse_copy_state { } ring; }; -#define FUSE_DEV_SYNC_INIT ((struct fuse_dev *) 1) -#define FUSE_DEV_PTR_MASK (~1UL) +/* fud->fc gets assigned to this value when /dev/fuse is closed */ +#define FUSE_DEV_FC_DISCONNECTED ((struct fuse_conn *) 1) + +/* + * Lockless access is OK, because fud->fc is set once during mount and is valid + * until the file is released. + * + * fud->fc is set to FUSE_DEV_FC_DISCONNECTED only after the containing file is + * released, so result is safe to dereference in most cases. Exceptions are: + * fuse_dev_put() and fuse_fill_super_common(). + */ +static inline struct fuse_conn *fuse_dev_fc_get(struct fuse_dev *fud) +{ + /* Pairs with xchg() in fuse_dev_install() */ + return smp_load_acquire(&fud->fc); +} + +static inline struct fuse_dev *fuse_file_to_fud(struct file *file) +{ + return file->private_data; +} static inline struct fuse_dev *__fuse_get_dev(struct file *file) { - /* - * Lockless access is OK, because file->private data is set - * once during mount and is valid until the file is released. - */ - struct fuse_dev *fud = READ_ONCE(file->private_data); + struct fuse_dev *fud = fuse_file_to_fud(file); - return (typeof(fud)) ((unsigned long) fud & FUSE_DEV_PTR_MASK); + if (!fuse_dev_fc_get(fud)) + return NULL; + + return fud; } struct fuse_dev *fuse_get_dev(struct file *file); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7f16049387d1..17423d4e3cfa 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -345,6 +345,7 @@ struct fuse_args { bool is_ext:1; bool is_pinned:1; bool invalidate_vmap:1; + bool abort_on_kill:1; struct fuse_in_arg in_args[4]; struct fuse_arg out_args[2]; void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); @@ -576,6 +577,12 @@ struct fuse_pqueue { * Fuse device instance */ struct fuse_dev { + /** Reference count of this object */ + refcount_t ref; + + /** Issue FUSE_INIT synchronously */ + bool sync_init; + /** Fuse connection for this device */ struct fuse_conn *fc; @@ -599,13 +606,11 @@ static inline bool fuse_is_inode_dax_mode(enum fuse_dax_mode mode) } struct fuse_fs_context { - int fd; - struct file *file; + struct fuse_dev *fud; unsigned int rootmode; kuid_t user_id; kgid_t group_id; bool is_bdev:1; - bool fd_present:1; bool rootmode_present:1; bool user_id_present:1; bool group_id_present:1; @@ -622,9 +627,6 @@ struct fuse_fs_context { /* DAX device, may be NULL */ struct dax_device *dax_dev; - - /* fuse_dev pointer to fill in, should contain NULL on entry */ - void **fudptr; }; struct fuse_sync_bucket { @@ -648,9 +650,6 @@ struct fuse_conn { /** Refcount */ refcount_t count; - /** Number of fuse_dev's */ - atomic_t dev_count; - /** Current epoch for up-to-date dentries */ atomic_t epoch; @@ -1343,7 +1342,7 @@ void fuse_conn_put(struct fuse_conn *fc); struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc); struct fuse_dev *fuse_dev_alloc(void); void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc); -void fuse_dev_free(struct fuse_dev *fud); +void fuse_dev_put(struct fuse_dev *fud); int fuse_send_init(struct fuse_mount *fm); /** diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index c795abe47a4f..deddfffb037f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -470,6 +470,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, struct inode *inode; struct fuse_inode *fi; struct fuse_conn *fc = get_fuse_conn_super(sb); + bool is_new_inode = false; /* * Auto mount points get their node id from the submount root, which is @@ -505,13 +506,13 @@ retry: if (!inode) return NULL; - if ((inode_state_read_once(inode) & I_NEW)) { + is_new_inode = inode_state_read_once(inode) & I_NEW; + if (is_new_inode) { inode->i_flags |= S_NOATIME; if (!fc->writeback_cache || !S_ISREG(attr->mode)) inode->i_flags |= S_NOCMTIME; inode->i_generation = generation; fuse_init_inode(inode, attr, fc); - unlock_new_inode(inode); } else if (fuse_stale_inode(inode, generation, attr)) { /* nodeid was reused, any I/O on the old inode should fail */ fuse_make_bad(inode); @@ -528,6 +529,8 @@ retry: done: fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version, evict_ctr); + if (is_new_inode) + unlock_new_inode(inode); return inode; } @@ -788,7 +791,7 @@ enum { static const struct fs_parameter_spec fuse_fs_parameters[] = { fsparam_string ("source", OPT_SOURCE), - fsparam_u32 ("fd", OPT_FD), + fsparam_fd ("fd", OPT_FD), fsparam_u32oct ("rootmode", OPT_ROOTMODE), fsparam_uid ("user_id", OPT_USER_ID), fsparam_gid ("group_id", OPT_GROUP_ID), @@ -800,6 +803,25 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = { {} }; +static int fuse_opt_fd(struct fs_context *fsc, struct file *file) +{ + struct fuse_fs_context *ctx = fsc->fs_private; + + if (file->f_op != &fuse_dev_operations) + return invalfc(fsc, "fd is not a fuse device"); + /* + * Require mount to happen from the same user namespace which + * opened /dev/fuse to prevent potential attacks. + */ + if (file->f_cred->user_ns != fsc->user_ns) + return invalfc(fsc, "wrong user namespace for fuse device"); + + ctx->fud = file->private_data; + refcount_inc(&ctx->fud->ref); + + return 0; +} + static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) { struct fs_parse_result result; @@ -839,9 +861,15 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) return 0; case OPT_FD: - ctx->fd = result.uint_32; - ctx->fd_present = true; - break; + if (param->type == fs_value_is_file) { + return fuse_opt_fd(fsc, param->file); + } else { + struct file *file __free(fput) = fget(result.uint_32); + if (!file) + return -EBADF; + + return fuse_opt_fd(fsc, file); + } case OPT_ROOTMODE: if (!fuse_valid_type(result.uint_32)) @@ -904,6 +932,8 @@ static void fuse_free_fsc(struct fs_context *fsc) struct fuse_fs_context *ctx = fsc->fs_private; if (ctx) { + if (ctx->fud) + fuse_dev_put(ctx->fud); kfree(ctx->subtype); kfree(ctx); } @@ -975,7 +1005,6 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, spin_lock_init(&fc->bg_lock); init_rwsem(&fc->killsb); refcount_set(&fc->count, 1); - atomic_set(&fc->dev_count, 1); atomic_set(&fc->epoch, 1); INIT_WORK(&fc->epoch_work, fuse_epoch_work); init_waitqueue_head(&fc->blocked_waitq); @@ -1551,6 +1580,7 @@ int fuse_send_init(struct fuse_mount *fm) int err; if (fm->fc->sync_init) { + ia->args.abort_on_kill = true; err = fuse_simple_request(fm, &ia->args); /* Ignore size of init reply */ if (err > 0) @@ -1622,6 +1652,7 @@ struct fuse_dev *fuse_dev_alloc(void) if (!fud) return NULL; + refcount_set(&fud->ref, 1); pq = kzalloc_objs(struct list_head, FUSE_PQ_HASH_SIZE); if (!pq) { kfree(fud); @@ -1637,9 +1668,26 @@ EXPORT_SYMBOL_GPL(fuse_dev_alloc); void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc) { - fud->fc = fuse_conn_get(fc); + struct fuse_conn *old_fc; + spin_lock(&fc->lock); - list_add_tail(&fud->entry, &fc->devices); + /* + * Pairs with: + * - xchg() in fuse_dev_release() + * - smp_load_acquire() in fuse_dev_fc_get() + */ + old_fc = cmpxchg(&fud->fc, NULL, fc); + if (old_fc) { + /* + * failed to set fud->fc because + * - it was already set to a different fc + * - it was set to disconneted + */ + fc->connected = 0; + } else { + list_add_tail(&fud->entry, &fc->devices); + fuse_conn_get(fc); + } spin_unlock(&fc->lock); } EXPORT_SYMBOL_GPL(fuse_dev_install); @@ -1657,11 +1705,16 @@ struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc) } EXPORT_SYMBOL_GPL(fuse_dev_alloc_install); -void fuse_dev_free(struct fuse_dev *fud) +void fuse_dev_put(struct fuse_dev *fud) { - struct fuse_conn *fc = fud->fc; + struct fuse_conn *fc; - if (fc) { + if (!refcount_dec_and_test(&fud->ref)) + return; + + fc = fuse_dev_fc_get(fud); + if (fc && fc != FUSE_DEV_FC_DISCONNECTED) { + /* This is the virtiofs case (fuse_dev_release() not called) */ spin_lock(&fc->lock); list_del(&fud->entry); spin_unlock(&fc->lock); @@ -1671,7 +1724,7 @@ void fuse_dev_free(struct fuse_dev *fud) kfree(fud->pq.processing); kfree(fud); } -EXPORT_SYMBOL_GPL(fuse_dev_free); +EXPORT_SYMBOL_GPL(fuse_dev_put); static void fuse_fill_attr_from_inode(struct fuse_attr *attr, const struct fuse_inode *fi) @@ -1823,7 +1876,7 @@ EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount); int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) { - struct fuse_dev *fud = NULL; + struct fuse_dev *fud = ctx->fud; struct fuse_mount *fm = get_fuse_mount_super(sb); struct fuse_conn *fc = fm->fc; struct inode *root; @@ -1857,18 +1910,11 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) goto err; } - if (ctx->fudptr) { - err = -ENOMEM; - fud = fuse_dev_alloc_install(fc); - if (!fud) - goto err_free_dax; - } - fc->dev = sb->s_dev; fm->sb = sb; err = fuse_bdi_init(fc, sb); if (err) - goto err_dev_free; + goto err_free_dax; /* Handle umasking inside the fuse code */ if (sb->s_flags & SB_POSIXACL) @@ -1890,15 +1936,15 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) set_default_d_op(sb, &fuse_dentry_operations); root_dentry = d_make_root(root); if (!root_dentry) - goto err_dev_free; + goto err_free_dax; mutex_lock(&fuse_mutex); err = -EINVAL; - if (ctx->fudptr && *ctx->fudptr) { - if (*ctx->fudptr == FUSE_DEV_SYNC_INIT) - fc->sync_init = 1; - else + if (fud) { + if (fuse_dev_fc_get(fud)) goto err_unlock; + if (fud->sync_init) + fc->sync_init = 1; } err = fuse_ctl_add_conn(fc); @@ -1907,8 +1953,8 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; - if (ctx->fudptr) { - *ctx->fudptr = fud; + if (fud) { + fuse_dev_install(fud, fc); wake_up_all(&fuse_dev_waitq); } mutex_unlock(&fuse_mutex); @@ -1917,9 +1963,6 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) err_unlock: mutex_unlock(&fuse_mutex); dput(root_dentry); - err_dev_free: - if (fud) - fuse_dev_free(fud); err_free_dax: if (IS_ENABLED(CONFIG_FUSE_DAX)) fuse_dax_conn_free(fc); @@ -1934,24 +1977,13 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) struct fuse_mount *fm; int err; - if (!ctx->file || !ctx->rootmode_present || + if (!ctx->fud || !ctx->rootmode_present || !ctx->user_id_present || !ctx->group_id_present) return -EINVAL; - /* - * Require mount to happen from the same user namespace which - * opened /dev/fuse to prevent potential attacks. - */ - if ((ctx->file->f_op != &fuse_dev_operations) || - (ctx->file->f_cred->user_ns != sb->s_user_ns)) - return -EINVAL; - ctx->fudptr = &ctx->file->private_data; - err = fuse_fill_super_common(sb, ctx); if (err) return err; - /* file->private_data shall be visible on all CPUs after this */ - smp_mb(); fm = get_fuse_mount_super(sb); @@ -1969,14 +2001,14 @@ static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc) static int fuse_test_super(struct super_block *sb, struct fs_context *fsc) { + struct fuse_dev *fud = fsc->sget_key; - return fsc->sget_key == get_fuse_conn_super(sb); + return fuse_dev_fc_get(fud) == get_fuse_conn_super(sb); } static int fuse_get_tree(struct fs_context *fsc) { struct fuse_fs_context *ctx = fsc->fs_private; - struct fuse_dev *fud; struct fuse_conn *fc; struct fuse_mount *fm; struct super_block *sb; @@ -1997,9 +2029,6 @@ static int fuse_get_tree(struct fs_context *fsc) fsc->s_fs_info = fm; - if (ctx->fd_present) - ctx->file = fget(ctx->fd); - if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) { err = get_tree_bdev(fsc, fuse_fill_super); goto out; @@ -2009,16 +2038,15 @@ static int fuse_get_tree(struct fs_context *fsc) * (found by device name), normal fuse mounts can't */ err = -EINVAL; - if (!ctx->file) + if (!ctx->fud) goto out; /* * Allow creating a fuse mount with an already initialized fuse * connection */ - fud = __fuse_get_dev(ctx->file); - if (ctx->file->f_op == &fuse_dev_operations && fud) { - fsc->sget_key = fud->fc; + if (fuse_dev_fc_get(ctx->fud)) { + fsc->sget_key = ctx->fud; sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super); err = PTR_ERR_OR_ZERO(sb); if (!IS_ERR(sb)) @@ -2029,8 +2057,6 @@ static int fuse_get_tree(struct fs_context *fsc) out: if (fsc->s_fs_info) fuse_mount_destroy(fm); - if (ctx->file) - fput(ctx->file); return err; } diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index c2aae2eef086..c88194e52d18 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -52,7 +52,7 @@ static void fuse_add_dirent_to_cache(struct file *file, } version = fi->rdc.version; size = fi->rdc.size; - offset = size & ~PAGE_MASK; + offset = offset_in_page(size); index = size >> PAGE_SHIFT; /* Dirent doesn't fit in current page? Jump to next page. */ if (offset + reclen > PAGE_SIZE) { @@ -392,7 +392,7 @@ static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff, void *addr, unsigned int size, struct dir_context *ctx) { - unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK; + unsigned int offset = offset_in_page(ff->readdir.cache_off); enum fuse_parse_result res = FOUND_NONE; WARN_ON(offset >= size); @@ -518,13 +518,13 @@ retry_locked: index = ff->readdir.cache_off >> PAGE_SHIFT; if (index == (fi->rdc.size >> PAGE_SHIFT)) - size = fi->rdc.size & ~PAGE_MASK; + size = offset_in_page(fi->rdc.size); else size = PAGE_SIZE; spin_unlock(&fi->rdc.lock); /* EOF? */ - if ((ff->readdir.cache_off & ~PAGE_MASK) == size) + if (offset_in_page(ff->readdir.cache_off) == size) return 0; page = find_get_page_flags(file->f_mapping, index, diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 057e65b51b99..12300651a0f1 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -486,7 +486,7 @@ static void virtio_fs_free_devs(struct virtio_fs *fs) if (!fsvq->fud) continue; - fuse_dev_free(fsvq->fud); + fuse_dev_put(fsvq->fud); fsvq->fud = NULL; } } @@ -758,6 +758,27 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) req->argbuf = NULL; } +/* Verify that the server properly follows the FUSE protocol */ +static bool virtio_fs_verify_response(struct fuse_req *req, unsigned int len) +{ + struct fuse_out_header *oh = &req->out.h; + + if (len < sizeof(*oh)) { + pr_warn("virtio-fs: response too short (%u)\n", len); + return false; + } + if (oh->len != len) { + pr_warn("virtio-fs: oh.len mismatch (%u != %u)\n", oh->len, len); + return false; + } + if (oh->unique != req->in.h.unique) { + pr_warn("virtio-fs: oh.unique mismatch (%llu != %llu)\n", + oh->unique, req->in.h.unique); + return false; + } + return true; +} + /* Work function for request completion */ static void virtio_fs_request_complete(struct fuse_req *req, struct virtio_fs_vq *fsvq) @@ -767,10 +788,6 @@ static void virtio_fs_request_complete(struct fuse_req *req, unsigned int len, i, thislen; struct folio *folio; - /* - * TODO verify that server properly follows FUSE protocol - * (oh.uniq, oh.len) - */ args = req->args; copy_args_from_argbuf(args, req); @@ -824,6 +841,10 @@ static void virtio_fs_requests_done_work(struct work_struct *work) virtqueue_disable_cb(vq); while ((req = virtqueue_get_buf(vq, &len)) != NULL) { + if (!virtio_fs_verify_response(req, len)) { + req->out.h.error = -EIO; + req->out.h.len = sizeof(struct fuse_out_header); + } spin_lock(&fpq->lock); list_move_tail(&req->list, &reqs); spin_unlock(&fpq->lock); @@ -1569,8 +1590,6 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) goto err_free_fuse_devs; } - /* virtiofs allocates and installs its own fuse devices */ - ctx->fudptr = NULL; if (ctx->dax_mode != FUSE_DAX_NEVER) { if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) { err = -EINVAL;