mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Under memory pressure, direct reclaim can kick in during compressed readahead. This puts the associated task into D-state. Then shrink_lruvec() disables interrupts when acquiring the LRU lock. Under heavy pressure, we've observed reclaim can run long enough that the CPU becomes prone to CSD lock stalls since it cannot service incoming IPIs. Although the CSD lock stalls are the worst case scenario, we have found many more subtle occurrences of this latency on the order of seconds, over a minute in some cases. Prevent direct reclaim during compressed readahead. This is achieved by using different GFP flags at key points when the bio is marked for readahead. There are two functions that allocate during compressed readahead: btrfs_alloc_compr_folio() and add_ra_bio_pages(). Both currently use GFP_NOFS which includes __GFP_DIRECT_RECLAIM. For the internal API call btrfs_alloc_compr_folio(), the signature changes to accept an additional gfp_t parameter. At the readahead call site, it gets flags similar to GFP_NOFS but stripped of __GFP_DIRECT_RECLAIM. __GFP_NOWARN is added since these allocations are allowed to fail. Demand reads still use full GFP_NOFS and will enter reclaim if needed. All other existing call sites of btrfs_alloc_compr_folio() now explicitly pass GFP_NOFS to retain their current behavior. add_ra_bio_pages() gains a bool parameter which allows callers to specify if they want to allow direct reclaim or not. In either case, the __GFP_NOWARN flag was added unconditionally since the allocations are speculative. There has been some previous work done on calling add_ra_bio_pages() [0]. This patch is complementary: where that patch reduces call frequency, this patch reduces the latency associated with those calls. [0] https://lore.kernel.org/linux-btrfs/656838ec1232314a2657716e59f4f15a8eadba64.1751492111.git.boris@bur.io/ Reviewed-by: Mark Harmstone <mark@harmstone.com> Reviewed-by: Qu Wenruo <wqu@suse.com> Signed-off-by: JP Kobryn (Meta) <jp.kobryn@linux.dev> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
184 lines
6.4 KiB
C
184 lines
6.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Copyright (C) 2008 Oracle. All rights reserved.
|
|
*/
|
|
|
|
#ifndef BTRFS_COMPRESSION_H
|
|
#define BTRFS_COMPRESSION_H
|
|
|
|
#include <linux/sizes.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/list.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/pagemap.h>
|
|
#include "bio.h"
|
|
#include "fs.h"
|
|
#include "btrfs_inode.h"
|
|
|
|
struct address_space;
|
|
struct inode;
|
|
struct btrfs_inode;
|
|
struct btrfs_ordered_extent;
|
|
|
|
/*
|
|
* We want to make sure that amount of RAM required to uncompress an extent is
|
|
* reasonable, so we limit the total size in ram of a compressed extent to
|
|
* 128k. This is a crucial number because it also controls how easily we can
|
|
* spread reads across cpus for decompression.
|
|
*
|
|
* We also want to make sure the amount of IO required to do a random read is
|
|
* reasonably small, so we limit the size of a compressed extent to 128k.
|
|
*/
|
|
|
|
/* Maximum length of compressed data stored on disk */
|
|
#define BTRFS_MAX_COMPRESSED (SZ_128K)
|
|
#define BTRFS_MAX_COMPRESSED_PAGES (BTRFS_MAX_COMPRESSED / PAGE_SIZE)
|
|
static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
|
|
|
|
/* The max size for a single worker to compress. */
|
|
#define BTRFS_COMPRESSION_CHUNK_SIZE (SZ_512K)
|
|
|
|
/* Maximum size of data before compression */
|
|
#define BTRFS_MAX_UNCOMPRESSED (SZ_128K)
|
|
|
|
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
|
|
|
|
struct compressed_bio {
|
|
/* starting offset in the inode for our pages */
|
|
u64 start;
|
|
|
|
/* Number of bytes in the inode we're working on */
|
|
unsigned int len;
|
|
|
|
/* The compression algorithm for this bio */
|
|
u8 compress_type;
|
|
|
|
/* Whether this is a write for writeback. */
|
|
bool writeback;
|
|
|
|
/* For reads, this is the bio we are copying the data into. */
|
|
struct btrfs_bio *orig_bbio;
|
|
|
|
/* Must be last. */
|
|
struct btrfs_bio bbio;
|
|
};
|
|
|
|
static inline struct btrfs_fs_info *cb_to_fs_info(const struct compressed_bio *cb)
|
|
{
|
|
return cb->bbio.inode->root->fs_info;
|
|
}
|
|
|
|
/* @range_end must be exclusive. */
|
|
static inline u32 btrfs_calc_input_length(struct folio *folio, u64 range_end, u64 cur)
|
|
{
|
|
/* @cur must be inside the folio. */
|
|
ASSERT(folio_pos(folio) <= cur);
|
|
ASSERT(cur < folio_next_pos(folio));
|
|
return umin(range_end, folio_next_pos(folio)) - cur;
|
|
}
|
|
|
|
int btrfs_alloc_compress_wsm(struct btrfs_fs_info *fs_info);
|
|
void btrfs_free_compress_wsm(struct btrfs_fs_info *fs_info);
|
|
|
|
int __init btrfs_init_compress(void);
|
|
void __cold btrfs_exit_compress(void);
|
|
|
|
bool btrfs_compress_level_valid(unsigned int type, int level);
|
|
int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio,
|
|
unsigned long dest_pgoff, size_t srclen, size_t destlen);
|
|
int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
|
|
struct compressed_bio *cb, u32 decompressed);
|
|
|
|
struct compressed_bio *btrfs_alloc_compressed_write(struct btrfs_inode *inode,
|
|
u64 start, u64 len);
|
|
void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
|
|
struct compressed_bio *cb);
|
|
void btrfs_submit_compressed_read(struct btrfs_bio *bbio);
|
|
|
|
int btrfs_compress_str2level(unsigned int type, const char *str, int *level_ret);
|
|
|
|
struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info, gfp_t gfp);
|
|
void btrfs_free_compr_folio(struct folio *folio);
|
|
|
|
struct workspace_manager {
|
|
struct list_head idle_ws;
|
|
spinlock_t ws_lock;
|
|
/* Number of free workspaces */
|
|
int free_ws;
|
|
/* Total number of allocated workspaces */
|
|
atomic_t total_ws;
|
|
/* Waiters for a free workspace */
|
|
wait_queue_head_t ws_wait;
|
|
};
|
|
|
|
struct list_head *btrfs_get_workspace(struct btrfs_fs_info *fs_info, int type, int level);
|
|
void btrfs_put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws);
|
|
|
|
struct btrfs_compress_levels {
|
|
/* Maximum level supported by the compression algorithm */
|
|
int min_level;
|
|
int max_level;
|
|
int default_level;
|
|
};
|
|
|
|
/* The heuristic workspaces are managed via the 0th workspace manager */
|
|
#define BTRFS_NR_WORKSPACE_MANAGERS BTRFS_NR_COMPRESS_TYPES
|
|
|
|
extern const struct btrfs_compress_levels btrfs_heuristic_compress;
|
|
extern const struct btrfs_compress_levels btrfs_zlib_compress;
|
|
extern const struct btrfs_compress_levels btrfs_lzo_compress;
|
|
extern const struct btrfs_compress_levels btrfs_zstd_compress;
|
|
|
|
const char* btrfs_compress_type2str(enum btrfs_compression_type type);
|
|
bool btrfs_compress_is_valid_type(const char *str, size_t len);
|
|
|
|
int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end);
|
|
|
|
int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start,
|
|
struct folio **in_folio_ret);
|
|
struct compressed_bio *btrfs_compress_bio(struct btrfs_inode *inode,
|
|
u64 start, u32 len, unsigned int type,
|
|
int level, blk_opf_t write_flags);
|
|
|
|
static inline void cleanup_compressed_bio(struct compressed_bio *cb)
|
|
{
|
|
struct bio *bio = &cb->bbio.bio;
|
|
struct folio_iter fi;
|
|
|
|
bio_for_each_folio_all(fi, bio)
|
|
btrfs_free_compr_folio(fi.folio);
|
|
bio_put(bio);
|
|
}
|
|
|
|
int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb);
|
|
int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
|
|
int zlib_decompress(struct list_head *ws, const u8 *data_in,
|
|
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
|
|
size_t destlen);
|
|
struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level);
|
|
void zlib_free_workspace(struct list_head *ws);
|
|
struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level);
|
|
|
|
int lzo_compress_bio(struct list_head *ws, struct compressed_bio *cb);
|
|
int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
|
|
int lzo_decompress(struct list_head *ws, const u8 *data_in,
|
|
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
|
|
size_t destlen);
|
|
struct list_head *lzo_alloc_workspace(struct btrfs_fs_info *fs_info);
|
|
void lzo_free_workspace(struct list_head *ws);
|
|
|
|
int zstd_compress_bio(struct list_head *ws, struct compressed_bio *cb);
|
|
int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
|
|
int zstd_decompress(struct list_head *ws, const u8 *data_in,
|
|
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
|
|
size_t destlen);
|
|
int zstd_alloc_workspace_manager(struct btrfs_fs_info *fs_info);
|
|
void zstd_free_workspace_manager(struct btrfs_fs_info *fs_info);
|
|
struct list_head *zstd_alloc_workspace(struct btrfs_fs_info *fs_info, int level);
|
|
void zstd_free_workspace(struct list_head *ws);
|
|
struct list_head *zstd_get_workspace(struct btrfs_fs_info *fs_info, int level);
|
|
void zstd_put_workspace(struct btrfs_fs_info *fs_info, struct list_head *ws);
|
|
|
|
#endif
|