Files
linux/mm/show_mem.c
Dave Airlie 2232ba9c79 mm: add gpu active/reclaim per-node stat counters (v2)
While discussing memcg intergration with gpu memory allocations,
it was pointed out that there was no numa/system counters for
GPU memory allocations.

With more integrated memory GPU server systems turning up, and
more requirements for memory tracking it seems we should start
closing the gap.

Add two counters to track GPU per-node system memory allocations.

The first is currently allocated to GPU objects, and the second
is for memory that is stored in GPU page pools that can be reclaimed,
by the shrinker.

Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Zi Yan <ziy@nvidia.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2026-04-08 06:52:47 +10:00

465 lines
13 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Generic show_mem() implementation
*
* Copyright (C) 2008 Johannes Weiner <hannes@saeurebad.de>
*/
#include <linux/blkdev.h>
#include <linux/cma.h>
#include <linux/cpuset.h>
#include <linux/highmem.h>
#include <linux/hugetlb.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/swap.h>
#include <linux/vmstat.h>
#include "internal.h"
#include "swap.h"
atomic_long_t _totalram_pages __read_mostly;
EXPORT_SYMBOL(_totalram_pages);
unsigned long totalreserve_pages __read_mostly;
unsigned long totalcma_pages __read_mostly;
static inline void show_node(struct zone *zone)
{
if (IS_ENABLED(CONFIG_NUMA))
printk("Node %d ", zone_to_nid(zone));
}
long si_mem_available(void)
{
long available;
unsigned long pagecache;
unsigned long wmark_low = 0;
unsigned long reclaimable;
struct zone *zone;
for_each_zone(zone)
wmark_low += low_wmark_pages(zone);
/*
* Estimate the amount of memory available for userspace allocations,
* without causing swapping or OOM.
*/
available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages;
/*
* Not all the page cache can be freed, otherwise the system will
* start swapping or thrashing. Assume at least half of the page
* cache, or the low watermark worth of cache, needs to stay.
*/
pagecache = global_node_page_state(NR_ACTIVE_FILE) +
global_node_page_state(NR_INACTIVE_FILE);
pagecache -= min(pagecache / 2, wmark_low);
available += pagecache;
/*
* Part of the reclaimable slab and other kernel memory consists of
* items that are in use, and cannot be freed. Cap this estimate at the
* low watermark.
*/
reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) +
global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
reclaimable -= min(reclaimable / 2, wmark_low);
available += reclaimable;
if (available < 0)
available = 0;
return available;
}
EXPORT_SYMBOL_GPL(si_mem_available);
void si_meminfo(struct sysinfo *val)
{
val->totalram = totalram_pages();
val->sharedram = global_node_page_state(NR_SHMEM);
val->freeram = global_zone_page_state(NR_FREE_PAGES);
val->bufferram = nr_blockdev_pages();
val->totalhigh = totalhigh_pages();
val->freehigh = nr_free_highpages();
val->mem_unit = PAGE_SIZE;
}
EXPORT_SYMBOL(si_meminfo);
#ifdef CONFIG_NUMA
void si_meminfo_node(struct sysinfo *val, int nid)
{
int zone_type; /* needs to be signed */
unsigned long managed_pages = 0;
unsigned long managed_highpages = 0;
unsigned long free_highpages = 0;
pg_data_t *pgdat = NODE_DATA(nid);
for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
struct zone *zone = &pgdat->node_zones[zone_type];
managed_pages += zone_managed_pages(zone);
if (is_highmem(zone)) {
managed_highpages += zone_managed_pages(zone);
free_highpages += zone_page_state(zone, NR_FREE_PAGES);
}
}
val->totalram = managed_pages;
val->sharedram = node_page_state(pgdat, NR_SHMEM);
val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
val->totalhigh = managed_highpages;
val->freehigh = free_highpages;
val->mem_unit = PAGE_SIZE;
}
#endif
/*
* Determine whether the node should be displayed or not, depending on whether
* SHOW_MEM_FILTER_NODES was passed to show_free_areas().
*/
static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask)
{
if (!(flags & SHOW_MEM_FILTER_NODES))
return false;
/*
* no node mask - aka implicit memory numa policy. Do not bother with
* the synchronization - read_mems_allowed_begin - because we do not
* have to be precise here.
*/
if (!nodemask)
nodemask = &cpuset_current_mems_allowed;
return !node_isset(nid, *nodemask);
}
static void show_migration_types(unsigned char type)
{
static const char types[MIGRATE_TYPES] = {
[MIGRATE_UNMOVABLE] = 'U',
[MIGRATE_MOVABLE] = 'M',
[MIGRATE_RECLAIMABLE] = 'E',
[MIGRATE_HIGHATOMIC] = 'H',
#ifdef CONFIG_CMA
[MIGRATE_CMA] = 'C',
#endif
#ifdef CONFIG_MEMORY_ISOLATION
[MIGRATE_ISOLATE] = 'I',
#endif
};
char tmp[MIGRATE_TYPES + 1];
char *p = tmp;
int i;
for (i = 0; i < MIGRATE_TYPES; i++) {
if (type & (1 << i))
*p++ = types[i];
}
*p = '\0';
printk(KERN_CONT "(%s) ", tmp);
}
static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx)
{
int zone_idx;
for (zone_idx = 0; zone_idx <= max_zone_idx; zone_idx++)
if (zone_managed_pages(pgdat->node_zones + zone_idx))
return true;
return false;
}
/*
* Show free area list (used inside shift_scroll-lock stuff)
* We also calculate the percentage fragmentation. We do this by counting the
* memory on each free list with the exception of the first item on the list.
*
* Bits in @filter:
* SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
* cpuset.
*/
static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
{
unsigned long free_pcp = 0;
int cpu, nid;
struct zone *zone;
pg_data_t *pgdat;
for_each_populated_zone(zone) {
if (zone_idx(zone) > max_zone_idx)
continue;
if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
continue;
for_each_online_cpu(cpu)
free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
}
printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
" active_file:%lu inactive_file:%lu isolated_file:%lu\n"
" unevictable:%lu dirty:%lu writeback:%lu\n"
" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
" mapped:%lu shmem:%lu pagetables:%lu\n"
" sec_pagetables:%lu bounce:%lu\n"
" kernel_misc_reclaimable:%lu\n"
" free:%lu free_pcp:%lu free_cma:%lu\n",
global_node_page_state(NR_ACTIVE_ANON),
global_node_page_state(NR_INACTIVE_ANON),
global_node_page_state(NR_ISOLATED_ANON),
global_node_page_state(NR_ACTIVE_FILE),
global_node_page_state(NR_INACTIVE_FILE),
global_node_page_state(NR_ISOLATED_FILE),
global_node_page_state(NR_UNEVICTABLE),
global_node_page_state(NR_FILE_DIRTY),
global_node_page_state(NR_WRITEBACK),
global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B),
global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
global_node_page_state(NR_FILE_MAPPED),
global_node_page_state(NR_SHMEM),
global_node_page_state(NR_PAGETABLE),
global_node_page_state(NR_SECONDARY_PAGETABLE),
0UL,
global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE),
global_zone_page_state(NR_FREE_PAGES),
free_pcp,
global_zone_page_state(NR_FREE_CMA_PAGES));
for_each_online_pgdat(pgdat) {
if (show_mem_node_skip(filter, pgdat->node_id, nodemask))
continue;
if (!node_has_managed_zones(pgdat, max_zone_idx))
continue;
printk("Node %d"
" active_anon:%lukB"
" inactive_anon:%lukB"
" active_file:%lukB"
" inactive_file:%lukB"
" unevictable:%lukB"
" isolated(anon):%lukB"
" isolated(file):%lukB"
" mapped:%lukB"
" dirty:%lukB"
" writeback:%lukB"
" shmem:%lukB"
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
" shmem_thp:%lukB"
" shmem_pmdmapped:%lukB"
" anon_thp:%lukB"
#endif
" kernel_stack:%lukB"
#ifdef CONFIG_SHADOW_CALL_STACK
" shadow_call_stack:%lukB"
#endif
" pagetables:%lukB"
" sec_pagetables:%lukB"
" all_unreclaimable? %s"
" Balloon:%lukB"
" gpu_active:%lukB"
" gpu_reclaim:%lukB"
"\n",
pgdat->node_id,
K(node_page_state(pgdat, NR_ACTIVE_ANON)),
K(node_page_state(pgdat, NR_INACTIVE_ANON)),
K(node_page_state(pgdat, NR_ACTIVE_FILE)),
K(node_page_state(pgdat, NR_INACTIVE_FILE)),
K(node_page_state(pgdat, NR_UNEVICTABLE)),
K(node_page_state(pgdat, NR_ISOLATED_ANON)),
K(node_page_state(pgdat, NR_ISOLATED_FILE)),
K(node_page_state(pgdat, NR_FILE_MAPPED)),
K(node_page_state(pgdat, NR_FILE_DIRTY)),
K(node_page_state(pgdat, NR_WRITEBACK)),
K(node_page_state(pgdat, NR_SHMEM)),
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
K(node_page_state(pgdat, NR_SHMEM_THPS)),
K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)),
K(node_page_state(pgdat, NR_ANON_THPS)),
#endif
node_page_state(pgdat, NR_KERNEL_STACK_KB),
#ifdef CONFIG_SHADOW_CALL_STACK
node_page_state(pgdat, NR_KERNEL_SCS_KB),
#endif
K(node_page_state(pgdat, NR_PAGETABLE)),
K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)),
str_yes_no(kswapd_test_hopeless(pgdat)),
K(node_page_state(pgdat, NR_BALLOON_PAGES)),
K(node_page_state(pgdat, NR_GPU_ACTIVE)),
K(node_page_state(pgdat, NR_GPU_RECLAIM)));
}
for_each_populated_zone(zone) {
int i;
if (zone_idx(zone) > max_zone_idx)
continue;
if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
continue;
free_pcp = 0;
for_each_online_cpu(cpu)
free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
show_node(zone);
printk(KERN_CONT
"%s"
" free:%lukB"
" boost:%lukB"
" min:%lukB"
" low:%lukB"
" high:%lukB"
" reserved_highatomic:%luKB"
" free_highatomic:%luKB"
" active_anon:%lukB"
" inactive_anon:%lukB"
" active_file:%lukB"
" inactive_file:%lukB"
" unevictable:%lukB"
" writepending:%lukB"
" zspages:%lukB"
" present:%lukB"
" managed:%lukB"
" mlocked:%lukB"
" bounce:%lukB"
" free_pcp:%lukB"
" local_pcp:%ukB"
" free_cma:%lukB"
"\n",
zone->name,
K(zone_page_state(zone, NR_FREE_PAGES)),
K(zone->watermark_boost),
K(min_wmark_pages(zone)),
K(low_wmark_pages(zone)),
K(high_wmark_pages(zone)),
K(zone->nr_reserved_highatomic),
K(zone->nr_free_highatomic),
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)),
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)),
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)),
K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
#if IS_ENABLED(CONFIG_ZSMALLOC)
K(zone_page_state(zone, NR_ZSPAGES)),
#else
0UL,
#endif
K(zone->present_pages),
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
0UL,
K(free_pcp),
K(this_cpu_read(zone->per_cpu_pageset->count)),
K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
printk("lowmem_reserve[]:");
for (i = 0; i < MAX_NR_ZONES; i++)
printk(KERN_CONT " %ld", zone->lowmem_reserve[i]);
printk(KERN_CONT "\n");
}
for_each_populated_zone(zone) {
unsigned int order;
unsigned long nr[NR_PAGE_ORDERS], flags, total = 0;
unsigned char types[NR_PAGE_ORDERS];
if (zone_idx(zone) > max_zone_idx)
continue;
if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
continue;
show_node(zone);
printk(KERN_CONT "%s: ", zone->name);
spin_lock_irqsave(&zone->lock, flags);
for (order = 0; order < NR_PAGE_ORDERS; order++) {
struct free_area *area = &zone->free_area[order];
int type;
nr[order] = area->nr_free;
total += nr[order] << order;
types[order] = 0;
for (type = 0; type < MIGRATE_TYPES; type++) {
if (!free_area_empty(area, type))
types[order] |= 1 << type;
}
}
spin_unlock_irqrestore(&zone->lock, flags);
for (order = 0; order < NR_PAGE_ORDERS; order++) {
printk(KERN_CONT "%lu*%lukB ",
nr[order], K(1UL) << order);
if (nr[order])
show_migration_types(types[order]);
}
printk(KERN_CONT "= %lukB\n", K(total));
}
for_each_online_node(nid) {
if (show_mem_node_skip(filter, nid, nodemask))
continue;
hugetlb_show_meminfo_node(nid);
}
printk("%ld total pagecache pages\n", global_node_page_state(NR_FILE_PAGES));
show_swap_cache_info();
}
void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
{
unsigned long total = 0, reserved = 0, highmem = 0;
struct zone *zone;
printk("Mem-Info:\n");
show_free_areas(filter, nodemask, max_zone_idx);
for_each_populated_zone(zone) {
total += zone->present_pages;
reserved += zone->present_pages - zone_managed_pages(zone);
if (is_highmem(zone))
highmem += zone->present_pages;
}
printk("%lu pages RAM\n", total);
printk("%lu pages HighMem/MovableOnly\n", highmem);
printk("%lu pages reserved\n", reserved);
#ifdef CONFIG_CMA
printk("%lu pages cma reserved\n", totalcma_pages);
#endif
#ifdef CONFIG_MEMORY_FAILURE
printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
#endif
#ifdef CONFIG_MEM_ALLOC_PROFILING
static DEFINE_SPINLOCK(mem_alloc_profiling_spinlock);
if (spin_trylock(&mem_alloc_profiling_spinlock)) {
struct codetag_bytes tags[10];
size_t i, nr;
nr = alloc_tag_top_users(tags, ARRAY_SIZE(tags), false);
if (nr) {
pr_notice("Memory allocations (profiling is currently turned %s):\n",
mem_alloc_profiling_enabled() ? "on" : "off");
for (i = 0; i < nr; i++) {
struct codetag *ct = tags[i].ct;
struct alloc_tag *tag = ct_to_alloc_tag(ct);
struct alloc_tag_counters counter = alloc_tag_read(tag);
char bytes[10];
string_get_size(counter.bytes, 1, STRING_UNITS_2, bytes, sizeof(bytes));
/* Same as alloc_tag_to_text() but w/o intermediate buffer */
if (ct->modname)
pr_notice("%12s %8llu %s:%u [%s] func:%s\n",
bytes, counter.calls, ct->filename,
ct->lineno, ct->modname, ct->function);
else
pr_notice("%12s %8llu %s:%u func:%s\n",
bytes, counter.calls, ct->filename,
ct->lineno, ct->function);
}
}
spin_unlock(&mem_alloc_profiling_spinlock);
}
#endif
}