mm/sparse: move memory hotplug bits to sparse-vmemmap.c

Let's move all memory hoptplug related code to sparse-vmemmap.c.

We only have to expose sparse_index_init().  While at it, drop the
definition of sparse_index_init() for !CONFIG_SPARSEMEM, which is unused,
and place the declaration in internal.h.

Link: https://lkml.kernel.org/r/20260320-sparsemem_cleanups-v2-15-096addc8800d@kernel.org
Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
David Hildenbrand (Arm)
2026-03-20 23:13:47 +01:00
committed by Andrew Morton
parent 08e5f77c37
commit 738de20c4f
4 changed files with 310 additions and 309 deletions

View File

@@ -2370,7 +2370,6 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr)
#endif
#else
#define sparse_index_init(_sec, _nid) do {} while (0)
#define sparse_vmemmap_init_nid_early(_nid) do {} while (0)
#define sparse_vmemmap_init_nid_late(_nid) do {} while (0)
#define pfn_in_present_section pfn_valid

View File

@@ -964,6 +964,7 @@ void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
*/
#ifdef CONFIG_SPARSEMEM
void sparse_init(void);
int sparse_index_init(unsigned long section_nr, int nid);
static inline void sparse_init_one_section(struct mem_section *ms,
unsigned long pnum, struct page *mem_map,
@@ -999,6 +1000,9 @@ static inline void __section_mark_present(struct mem_section *ms,
static inline void sparse_init(void) {}
#endif /* CONFIG_SPARSEMEM */
/*
* mm/sparse-vmemmap.c
*/
#ifdef CONFIG_SPARSEMEM_VMEMMAP
void sparse_init_subsection_map(unsigned long pfn, unsigned long nr_pages);
#else

View File

@@ -591,3 +591,307 @@ void __init sparse_vmemmap_init_nid_late(int nid)
hugetlb_vmemmap_init_late(nid);
}
#endif
static void subsection_mask_set(unsigned long *map, unsigned long pfn,
unsigned long nr_pages)
{
int idx = subsection_map_index(pfn);
int end = subsection_map_index(pfn + nr_pages - 1);
bitmap_set(map, idx, end - idx + 1);
}
void __init sparse_init_subsection_map(unsigned long pfn, unsigned long nr_pages)
{
int end_sec_nr = pfn_to_section_nr(pfn + nr_pages - 1);
unsigned long nr, start_sec_nr = pfn_to_section_nr(pfn);
for (nr = start_sec_nr; nr <= end_sec_nr; nr++) {
struct mem_section *ms;
unsigned long pfns;
pfns = min(nr_pages, PAGES_PER_SECTION
- (pfn & ~PAGE_SECTION_MASK));
ms = __nr_to_section(nr);
subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
pfns, subsection_map_index(pfn),
subsection_map_index(pfn + pfns - 1));
pfn += pfns;
nr_pages -= pfns;
}
}
#ifdef CONFIG_MEMORY_HOTPLUG
/* Mark all memory sections within the pfn range as online */
void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long pfn;
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
unsigned long section_nr = pfn_to_section_nr(pfn);
struct mem_section *ms = __nr_to_section(section_nr);
ms->section_mem_map |= SECTION_IS_ONLINE;
}
}
/* Mark all memory sections within the pfn range as offline */
void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long pfn;
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
unsigned long section_nr = pfn_to_section_nr(pfn);
struct mem_section *ms = __nr_to_section(section_nr);
ms->section_mem_map &= ~SECTION_IS_ONLINE;
}
}
static struct page * __meminit populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap)
{
return __populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
}
static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
{
unsigned long start = (unsigned long) pfn_to_page(pfn);
unsigned long end = start + nr_pages * sizeof(struct page);
vmemmap_free(start, end, altmap);
}
static void free_map_bootmem(struct page *memmap)
{
unsigned long start = (unsigned long)memmap;
unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
vmemmap_free(start, end, NULL);
}
static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
{
DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
struct mem_section *ms = __pfn_to_section(pfn);
unsigned long *subsection_map = ms->usage
? &ms->usage->subsection_map[0] : NULL;
subsection_mask_set(map, pfn, nr_pages);
if (subsection_map)
bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
"section already deactivated (%#lx + %ld)\n",
pfn, nr_pages))
return -EINVAL;
bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
return 0;
}
static bool is_subsection_map_empty(struct mem_section *ms)
{
return bitmap_empty(&ms->usage->subsection_map[0],
SUBSECTIONS_PER_SECTION);
}
static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
{
struct mem_section *ms = __pfn_to_section(pfn);
DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
unsigned long *subsection_map;
int rc = 0;
subsection_mask_set(map, pfn, nr_pages);
subsection_map = &ms->usage->subsection_map[0];
if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
rc = -EINVAL;
else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
rc = -EEXIST;
else
bitmap_or(subsection_map, map, subsection_map,
SUBSECTIONS_PER_SECTION);
return rc;
}
/*
* To deactivate a memory region, there are 3 cases to handle:
*
* 1. deactivation of a partial hot-added section:
* a) section was present at memory init.
* b) section was hot-added post memory init.
* 2. deactivation of a complete hot-added section.
* 3. deactivation of a complete section from memory init.
*
* For 1, when subsection_map does not empty we will not be freeing the
* usage map, but still need to free the vmemmap range.
*/
static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
{
struct mem_section *ms = __pfn_to_section(pfn);
bool section_is_early = early_section(ms);
struct page *memmap = NULL;
bool empty;
if (clear_subsection_map(pfn, nr_pages))
return;
empty = is_subsection_map_empty(ms);
if (empty) {
/*
* Mark the section invalid so that valid_section()
* return false. This prevents code from dereferencing
* ms->usage array.
*/
ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
/*
* When removing an early section, the usage map is kept (as the
* usage maps of other sections fall into the same page). It
* will be re-used when re-adding the section - which is then no
* longer an early section. If the usage map is PageReserved, it
* was allocated during boot.
*/
if (!PageReserved(virt_to_page(ms->usage))) {
kfree_rcu(ms->usage, rcu);
WRITE_ONCE(ms->usage, NULL);
}
memmap = pfn_to_page(SECTION_ALIGN_DOWN(pfn));
}
/*
* The memmap of early sections is always fully populated. See
* section_activate() and pfn_valid() .
*/
if (!section_is_early) {
memmap_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE)));
depopulate_section_memmap(pfn, nr_pages, altmap);
} else if (memmap) {
memmap_boot_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page),
PAGE_SIZE)));
free_map_bootmem(memmap);
}
if (empty)
ms->section_mem_map = (unsigned long)NULL;
}
static struct page * __meminit section_activate(int nid, unsigned long pfn,
unsigned long nr_pages, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap)
{
struct mem_section *ms = __pfn_to_section(pfn);
struct mem_section_usage *usage = NULL;
struct page *memmap;
int rc;
if (!ms->usage) {
usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
if (!usage)
return ERR_PTR(-ENOMEM);
ms->usage = usage;
}
rc = fill_subsection_map(pfn, nr_pages);
if (rc) {
if (usage)
ms->usage = NULL;
kfree(usage);
return ERR_PTR(rc);
}
/*
* The early init code does not consider partially populated
* initial sections, it simply assumes that memory will never be
* referenced. If we hot-add memory into such a section then we
* do not need to populate the memmap and can simply reuse what
* is already there.
*/
if (nr_pages < PAGES_PER_SECTION && early_section(ms))
return pfn_to_page(pfn);
memmap = populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
if (!memmap) {
section_deactivate(pfn, nr_pages, altmap);
return ERR_PTR(-ENOMEM);
}
memmap_pages_add(DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE));
return memmap;
}
/**
* sparse_add_section - add a memory section, or populate an existing one
* @nid: The node to add section on
* @start_pfn: start pfn of the memory range
* @nr_pages: number of pfns to add in the section
* @altmap: alternate pfns to allocate the memmap backing store
* @pgmap: alternate compound page geometry for devmap mappings
*
* This is only intended for hotplug.
*
* Note that only VMEMMAP supports sub-section aligned hotplug,
* the proper alignment and size are gated by check_pfn_span().
*
*
* Return:
* * 0 - On success.
* * -EEXIST - Section has been present.
* * -ENOMEM - Out of memory.
*/
int __meminit sparse_add_section(int nid, unsigned long start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap)
{
unsigned long section_nr = pfn_to_section_nr(start_pfn);
struct mem_section *ms;
struct page *memmap;
int ret;
ret = sparse_index_init(section_nr, nid);
if (ret < 0)
return ret;
memmap = section_activate(nid, start_pfn, nr_pages, altmap, pgmap);
if (IS_ERR(memmap))
return PTR_ERR(memmap);
/*
* Poison uninitialized struct pages in order to catch invalid flags
* combinations.
*/
page_init_poison(memmap, sizeof(struct page) * nr_pages);
ms = __nr_to_section(section_nr);
__section_mark_present(ms, section_nr);
/* Align memmap to section boundary in the subsection case */
if (section_nr_to_pfn(section_nr) != start_pfn)
memmap = pfn_to_page(section_nr_to_pfn(section_nr));
sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0);
return 0;
}
void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
{
struct mem_section *ms = __pfn_to_section(pfn);
if (WARN_ON_ONCE(!valid_section(ms)))
return;
section_deactivate(pfn, nr_pages, altmap);
}
#endif /* CONFIG_MEMORY_HOTPLUG */

View File

@@ -79,7 +79,7 @@ static noinline struct mem_section __ref *sparse_index_alloc(int nid)
return section;
}
static int __meminit sparse_index_init(unsigned long section_nr, int nid)
int __meminit sparse_index_init(unsigned long section_nr, int nid)
{
unsigned long root = SECTION_NR_TO_ROOT(section_nr);
struct mem_section *section;
@@ -103,7 +103,7 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid)
return 0;
}
#else /* !SPARSEMEM_EXTREME */
static inline int sparse_index_init(unsigned long section_nr, int nid)
int sparse_index_init(unsigned long section_nr, int nid)
{
return 0;
}
@@ -167,40 +167,6 @@ static inline unsigned long first_present_section_nr(void)
return next_present_section_nr(-1);
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
static void subsection_mask_set(unsigned long *map, unsigned long pfn,
unsigned long nr_pages)
{
int idx = subsection_map_index(pfn);
int end = subsection_map_index(pfn + nr_pages - 1);
bitmap_set(map, idx, end - idx + 1);
}
void __init sparse_init_subsection_map(unsigned long pfn, unsigned long nr_pages)
{
int end_sec_nr = pfn_to_section_nr(pfn + nr_pages - 1);
unsigned long nr, start_sec_nr = pfn_to_section_nr(pfn);
for (nr = start_sec_nr; nr <= end_sec_nr; nr++) {
struct mem_section *ms;
unsigned long pfns;
pfns = min(nr_pages, PAGES_PER_SECTION
- (pfn & ~PAGE_SECTION_MASK));
ms = __nr_to_section(nr);
subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
pfns, subsection_map_index(pfn),
subsection_map_index(pfn + pfns - 1));
pfn += pfns;
nr_pages -= pfns;
}
}
#endif
/* Record a memory area against a node. */
static void __init memory_present(int nid, unsigned long start, unsigned long end)
{
@@ -482,275 +448,3 @@ void __init sparse_init(void)
sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
vmemmap_populate_print_last();
}
#ifdef CONFIG_MEMORY_HOTPLUG
/* Mark all memory sections within the pfn range as online */
void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long pfn;
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
unsigned long section_nr = pfn_to_section_nr(pfn);
struct mem_section *ms = __nr_to_section(section_nr);
ms->section_mem_map |= SECTION_IS_ONLINE;
}
}
/* Mark all memory sections within the pfn range as offline */
void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long pfn;
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
unsigned long section_nr = pfn_to_section_nr(pfn);
struct mem_section *ms = __nr_to_section(section_nr);
ms->section_mem_map &= ~SECTION_IS_ONLINE;
}
}
static struct page * __meminit populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap)
{
return __populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
}
static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
{
unsigned long start = (unsigned long) pfn_to_page(pfn);
unsigned long end = start + nr_pages * sizeof(struct page);
vmemmap_free(start, end, altmap);
}
static void free_map_bootmem(struct page *memmap)
{
unsigned long start = (unsigned long)memmap;
unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
vmemmap_free(start, end, NULL);
}
static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
{
DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
struct mem_section *ms = __pfn_to_section(pfn);
unsigned long *subsection_map = ms->usage
? &ms->usage->subsection_map[0] : NULL;
subsection_mask_set(map, pfn, nr_pages);
if (subsection_map)
bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
"section already deactivated (%#lx + %ld)\n",
pfn, nr_pages))
return -EINVAL;
bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
return 0;
}
static bool is_subsection_map_empty(struct mem_section *ms)
{
return bitmap_empty(&ms->usage->subsection_map[0],
SUBSECTIONS_PER_SECTION);
}
static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
{
struct mem_section *ms = __pfn_to_section(pfn);
DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
unsigned long *subsection_map;
int rc = 0;
subsection_mask_set(map, pfn, nr_pages);
subsection_map = &ms->usage->subsection_map[0];
if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
rc = -EINVAL;
else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
rc = -EEXIST;
else
bitmap_or(subsection_map, map, subsection_map,
SUBSECTIONS_PER_SECTION);
return rc;
}
/*
* To deactivate a memory region, there are 3 cases to handle:
*
* 1. deactivation of a partial hot-added section:
* a) section was present at memory init.
* b) section was hot-added post memory init.
* 2. deactivation of a complete hot-added section.
* 3. deactivation of a complete section from memory init.
*
* For 1, when subsection_map does not empty we will not be freeing the
* usage map, but still need to free the vmemmap range.
*/
static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
{
struct mem_section *ms = __pfn_to_section(pfn);
bool section_is_early = early_section(ms);
struct page *memmap = NULL;
bool empty;
if (clear_subsection_map(pfn, nr_pages))
return;
empty = is_subsection_map_empty(ms);
if (empty) {
/*
* Mark the section invalid so that valid_section()
* return false. This prevents code from dereferencing
* ms->usage array.
*/
ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
/*
* When removing an early section, the usage map is kept (as the
* usage maps of other sections fall into the same page). It
* will be re-used when re-adding the section - which is then no
* longer an early section. If the usage map is PageReserved, it
* was allocated during boot.
*/
if (!PageReserved(virt_to_page(ms->usage))) {
kfree_rcu(ms->usage, rcu);
WRITE_ONCE(ms->usage, NULL);
}
memmap = pfn_to_page(SECTION_ALIGN_DOWN(pfn));
}
/*
* The memmap of early sections is always fully populated. See
* section_activate() and pfn_valid() .
*/
if (!section_is_early) {
memmap_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE)));
depopulate_section_memmap(pfn, nr_pages, altmap);
} else if (memmap) {
memmap_boot_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page),
PAGE_SIZE)));
free_map_bootmem(memmap);
}
if (empty)
ms->section_mem_map = (unsigned long)NULL;
}
static struct page * __meminit section_activate(int nid, unsigned long pfn,
unsigned long nr_pages, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap)
{
struct mem_section *ms = __pfn_to_section(pfn);
struct mem_section_usage *usage = NULL;
struct page *memmap;
int rc;
if (!ms->usage) {
usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
if (!usage)
return ERR_PTR(-ENOMEM);
ms->usage = usage;
}
rc = fill_subsection_map(pfn, nr_pages);
if (rc) {
if (usage)
ms->usage = NULL;
kfree(usage);
return ERR_PTR(rc);
}
/*
* The early init code does not consider partially populated
* initial sections, it simply assumes that memory will never be
* referenced. If we hot-add memory into such a section then we
* do not need to populate the memmap and can simply reuse what
* is already there.
*/
if (nr_pages < PAGES_PER_SECTION && early_section(ms))
return pfn_to_page(pfn);
memmap = populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
if (!memmap) {
section_deactivate(pfn, nr_pages, altmap);
return ERR_PTR(-ENOMEM);
}
memmap_pages_add(DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE));
return memmap;
}
/**
* sparse_add_section - add a memory section, or populate an existing one
* @nid: The node to add section on
* @start_pfn: start pfn of the memory range
* @nr_pages: number of pfns to add in the section
* @altmap: alternate pfns to allocate the memmap backing store
* @pgmap: alternate compound page geometry for devmap mappings
*
* This is only intended for hotplug.
*
* Note that only VMEMMAP supports sub-section aligned hotplug,
* the proper alignment and size are gated by check_pfn_span().
*
*
* Return:
* * 0 - On success.
* * -EEXIST - Section has been present.
* * -ENOMEM - Out of memory.
*/
int __meminit sparse_add_section(int nid, unsigned long start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap)
{
unsigned long section_nr = pfn_to_section_nr(start_pfn);
struct mem_section *ms;
struct page *memmap;
int ret;
ret = sparse_index_init(section_nr, nid);
if (ret < 0)
return ret;
memmap = section_activate(nid, start_pfn, nr_pages, altmap, pgmap);
if (IS_ERR(memmap))
return PTR_ERR(memmap);
/*
* Poison uninitialized struct pages in order to catch invalid flags
* combinations.
*/
page_init_poison(memmap, sizeof(struct page) * nr_pages);
ms = __nr_to_section(section_nr);
__section_mark_present(ms, section_nr);
/* Align memmap to section boundary in the subsection case */
if (section_nr_to_pfn(section_nr) != start_pfn)
memmap = pfn_to_page(section_nr_to_pfn(section_nr));
sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0);
return 0;
}
void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
{
struct mem_section *ms = __pfn_to_section(pfn);
if (WARN_ON_ONCE(!valid_section(ms)))
return;
section_deactivate(pfn, nr_pages, altmap);
}
#endif /* CONFIG_MEMORY_HOTPLUG */