Files
linux/kernel/vmcore_info.c
Linus Torvalds 440d6635b2 Merge tag 'mm-nonmm-stable-2026-04-15-04-20' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton:

 - "pid: make sub-init creation retryable" (Oleg Nesterov)

   Make creation of init in a new namespace more robust by clearing away
   some historical cruft which is no longer needed. Also some
   documentation fixups

 - "selftests/fchmodat2: Error handling and general" (Mark Brown)

   Fix and a cleanup for the fchmodat2() syscall selftest

 - "lib: polynomial: Move to math/ and clean up" (Andy Shevchenko)

 - "hung_task: Provide runtime reset interface for hung task detector"
   (Aaron Tomlin)

   Give administrators the ability to zero out
   /proc/sys/kernel/hung_task_detect_count

 - "tools/getdelays: use the static UAPI headers from
   tools/include/uapi" (Thomas Weißschuh)

   Teach getdelays to use the in-kernel UAPI headers rather than the
   system-provided ones

 - "watchdog/hardlockup: Improvements to hardlockup" (Mayank Rungta)

   Several cleanups and fixups to the hardlockup detector code and its
   documentation

 - "lib/bch: fix undefined behavior from signed left-shifts" (Josh Law)

   A couple of small/theoretical fixes in the bch code

 - "ocfs2/dlm: fix two bugs in dlm_match_regions()" (Junrui Luo)

 - "cleanup the RAID5 XOR library" (Christoph Hellwig)

   A quite far-reaching cleanup to this code. I can't do better than to
   quote Christoph:

     "The XOR library used for the RAID5 parity is a bit of a mess right
      now. The main file sits in crypto/ despite not being cryptography
      and not using the crypto API, with the generic implementations
      sitting in include/asm-generic and the arch implementations
      sitting in an asm/ header in theory. The latter doesn't work for
      many cases, so architectures often build the code directly into
      the core kernel, or create another module for the architecture
      code.

      Change this to a single module in lib/ that also contains the
      architecture optimizations, similar to the library work Eric
      Biggers has done for the CRC and crypto libraries later. After
      that it changes to better calling conventions that allow for
      smarter architecture implementations (although none is contained
      here yet), and uses static_call to avoid indirection function call
      overhead"

 - "lib/list_sort: Clean up list_sort() scheduling workarounds"
   (Kuan-Wei Chiu)

   Clean up this library code by removing a hacky thing which was added
   for UBIFS, which UBIFS doesn't actually need

 - "Fix bugs in extract_iter_to_sg()" (Christian Ehrhardt)

   Fix a few bugs in the scatterlist code, add in-kernel tests for the
   now-fixed bugs and fix a leak in the test itself

 - "kdump: Enable LUKS-encrypted dump target support in ARM64 and
   PowerPC" (Coiby Xu)

   Enable support of the LUKS-encrypted device dump target on arm64 and
   powerpc

 - "ocfs2: consolidate extent list validation into block read callbacks"
   (Joseph Qi)

   Cleanup, simplify, and make more robust ocfs2's validation of extent
   list fields (Kernel test robot loves mounting corrupted fs images!)

* tag 'mm-nonmm-stable-2026-04-15-04-20' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (127 commits)
  ocfs2: validate group add input before caching
  ocfs2: validate bg_bits during freefrag scan
  ocfs2: fix listxattr handling when the buffer is full
  doc: watchdog: fix typos etc
  update Sean's email address
  ocfs2: use get_random_u32() where appropriate
  ocfs2: split transactions in dio completion to avoid credit exhaustion
  ocfs2: remove redundant l_next_free_rec check in __ocfs2_find_path()
  ocfs2: validate extent block list fields during block read
  ocfs2: remove empty extent list check in ocfs2_dx_dir_lookup_rec()
  ocfs2: validate dx_root extent list fields during block read
  ocfs2: fix use-after-free in ocfs2_fault() when VM_FAULT_RETRY
  ocfs2: handle invalid dinode in ocfs2_group_extend
  .get_maintainer.ignore: add Askar
  ocfs2: validate bg_list extent bounds in discontig groups
  checkpatch: exclude forward declarations of const structs
  tools/accounting: handle truncated taskstats netlink messages
  taskstats: set version in TGID exit notifications
  ocfs2/heartbeat: fix slot mapping rollback leaks on error paths
  arm64,ppc64le/kdump: pass dm-crypt keys to kdump kernel
  ...
2026-04-16 20:11:56 -07:00

254 lines
6.8 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* crash.c - kernel crash support code.
* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
*/
#include <linux/buildid.h>
#include <linux/init.h>
#include <linux/utsname.h>
#include <linux/vmalloc.h>
#include <linux/sizes.h>
#include <linux/kexec.h>
#include <linux/memory.h>
#include <linux/cpuhotplug.h>
#include <linux/memblock.h>
#include <linux/kmemleak.h>
#include <asm/page.h>
#include <asm/sections.h>
#include "kallsyms_internal.h"
#include "kexec_internal.h"
/* vmcoreinfo stuff */
unsigned char *vmcoreinfo_data;
size_t vmcoreinfo_size;
u32 *vmcoreinfo_note;
/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
static unsigned char *vmcoreinfo_data_safecopy;
struct hwerr_info {
atomic_t count;
time64_t timestamp;
};
/*
* The hwerr_data[] array is declared with global scope so that it remains
* accessible to vmcoreinfo even when Link Time Optimization (LTO) is enabled.
*/
struct hwerr_info hwerr_data[HWERR_RECOV_MAX];
Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
void *data, size_t data_len)
{
struct elf_note *note = (struct elf_note *)buf;
note->n_namesz = strlen(name) + 1;
note->n_descsz = data_len;
note->n_type = type;
buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word));
memcpy(buf, name, note->n_namesz);
buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word));
memcpy(buf, data, data_len);
buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word));
return buf;
}
void final_note(Elf_Word *buf)
{
memset(buf, 0, sizeof(struct elf_note));
}
static void update_vmcoreinfo_note(void)
{
u32 *buf = vmcoreinfo_note;
if (!vmcoreinfo_size)
return;
buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
vmcoreinfo_size);
final_note(buf);
}
void crash_update_vmcoreinfo_safecopy(void *ptr)
{
if (ptr)
memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
vmcoreinfo_data_safecopy = ptr;
}
void crash_save_vmcoreinfo(void)
{
if (!vmcoreinfo_note)
return;
/* Use the safe copy to generate vmcoreinfo note if have */
if (vmcoreinfo_data_safecopy)
vmcoreinfo_data = vmcoreinfo_data_safecopy;
vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds());
update_vmcoreinfo_note();
}
void vmcoreinfo_append_str(const char *fmt, ...)
{
va_list args;
char buf[0x50];
size_t r;
va_start(args, fmt);
r = vscnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size);
memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
vmcoreinfo_size += r;
WARN_ONCE(vmcoreinfo_size == VMCOREINFO_BYTES,
"vmcoreinfo data exceeds allocated size, truncating");
}
/*
* provide an empty default implementation here -- architecture
* code may override this
*/
void __weak arch_crash_save_vmcoreinfo(void)
{}
phys_addr_t __weak paddr_vmcoreinfo_note(void)
{
return __pa(vmcoreinfo_note);
}
EXPORT_SYMBOL(paddr_vmcoreinfo_note);
void hwerr_log_error_type(enum hwerr_error_type src)
{
if (src < 0 || src >= HWERR_RECOV_MAX)
return;
atomic_inc(&hwerr_data[src].count);
WRITE_ONCE(hwerr_data[src].timestamp, ktime_get_real_seconds());
}
EXPORT_SYMBOL_GPL(hwerr_log_error_type);
static int __init crash_save_vmcoreinfo_init(void)
{
int order;
order = get_order(VMCOREINFO_BYTES);
vmcoreinfo_data = (unsigned char *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
if (!vmcoreinfo_data) {
pr_warn("Memory allocation for vmcoreinfo_data failed\n");
return -ENOMEM;
}
vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
GFP_KERNEL | __GFP_ZERO);
if (!vmcoreinfo_note) {
free_pages((unsigned long)vmcoreinfo_data, order);
vmcoreinfo_data = NULL;
pr_warn("Memory allocation for vmcoreinfo_note failed\n");
return -ENOMEM;
}
VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
VMCOREINFO_BUILD_ID();
VMCOREINFO_PAGESIZE(PAGE_SIZE);
VMCOREINFO_SYMBOL(init_uts_ns);
VMCOREINFO_OFFSET(uts_namespace, name);
VMCOREINFO_SYMBOL(node_online_map);
#ifdef CONFIG_MMU
VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir);
#endif
VMCOREINFO_SYMBOL(_stext);
vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", (unsigned long) VMALLOC_START);
#ifndef CONFIG_NUMA
VMCOREINFO_SYMBOL(mem_map);
VMCOREINFO_SYMBOL(contig_page_data);
#endif
#ifdef CONFIG_SPARSEMEM_VMEMMAP
VMCOREINFO_SYMBOL_ARRAY(vmemmap);
#endif
#ifdef CONFIG_SPARSEMEM
VMCOREINFO_SYMBOL_ARRAY(mem_section);
VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
VMCOREINFO_STRUCT_SIZE(mem_section);
VMCOREINFO_OFFSET(mem_section, section_mem_map);
VMCOREINFO_NUMBER(SECTION_SIZE_BITS);
VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS);
#endif
VMCOREINFO_STRUCT_SIZE(page);
VMCOREINFO_STRUCT_SIZE(pglist_data);
VMCOREINFO_STRUCT_SIZE(zone);
VMCOREINFO_STRUCT_SIZE(free_area);
VMCOREINFO_STRUCT_SIZE(list_head);
VMCOREINFO_SIZE(nodemask_t);
VMCOREINFO_OFFSET(page, flags);
VMCOREINFO_OFFSET(page, _refcount);
VMCOREINFO_OFFSET(page, mapping);
VMCOREINFO_OFFSET(page, lru);
VMCOREINFO_OFFSET(page, _mapcount);
VMCOREINFO_OFFSET(page, private);
VMCOREINFO_OFFSET(page, compound_info);
VMCOREINFO_OFFSET(pglist_data, node_zones);
VMCOREINFO_OFFSET(pglist_data, nr_zones);
#ifdef CONFIG_FLATMEM
VMCOREINFO_OFFSET(pglist_data, node_mem_map);
#endif
VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
VMCOREINFO_OFFSET(pglist_data, node_id);
VMCOREINFO_OFFSET(zone, free_area);
VMCOREINFO_OFFSET(zone, vm_stat);
VMCOREINFO_OFFSET(zone, spanned_pages);
VMCOREINFO_OFFSET(free_area, free_list);
VMCOREINFO_OFFSET(list_head, next);
VMCOREINFO_OFFSET(list_head, prev);
VMCOREINFO_LENGTH(zone.free_area, NR_PAGE_ORDERS);
log_buf_vmcoreinfo_setup();
VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
VMCOREINFO_NUMBER(NR_FREE_PAGES);
VMCOREINFO_NUMBER(PG_lru);
VMCOREINFO_NUMBER(PG_private);
VMCOREINFO_NUMBER(PG_swapcache);
VMCOREINFO_NUMBER(PG_swapbacked);
#define PAGE_SLAB_MAPCOUNT_VALUE (PGTY_slab << 24)
VMCOREINFO_NUMBER(PAGE_SLAB_MAPCOUNT_VALUE);
#ifdef CONFIG_MEMORY_FAILURE
VMCOREINFO_NUMBER(PG_hwpoison);
#endif
VMCOREINFO_NUMBER(PG_head_mask);
#define PAGE_BUDDY_MAPCOUNT_VALUE (PGTY_buddy << 24)
VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
#define PAGE_HUGETLB_MAPCOUNT_VALUE (PGTY_hugetlb << 24)
VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE);
#define PAGE_OFFLINE_MAPCOUNT_VALUE (PGTY_offline << 24)
VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
#ifdef CONFIG_UNACCEPTED_MEMORY
#define PAGE_UNACCEPTED_MAPCOUNT_VALUE (PGTY_unaccepted << 24)
VMCOREINFO_NUMBER(PAGE_UNACCEPTED_MAPCOUNT_VALUE);
#endif
#ifdef CONFIG_KALLSYMS
VMCOREINFO_SYMBOL(kallsyms_names);
VMCOREINFO_SYMBOL(kallsyms_num_syms);
VMCOREINFO_SYMBOL(kallsyms_token_table);
VMCOREINFO_SYMBOL(kallsyms_token_index);
VMCOREINFO_SYMBOL(kallsyms_offsets);
#endif /* CONFIG_KALLSYMS */
arch_crash_save_vmcoreinfo();
update_vmcoreinfo_note();
return 0;
}
subsys_initcall(crash_save_vmcoreinfo_init);