mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
In order to be able to do this, we need to change VM_DATA_DEFAULT_FLAGS and friends and update the architecture-specific definitions also. We then have to update some KSM logic to handle VMA flags, and introduce VMA_STACK_FLAGS to define the vma_flags_t equivalent of VM_STACK_FLAGS. We also introduce two helper functions for use during the time we are converting legacy flags to vma_flags_t values - vma_flags_to_legacy() and legacy_to_vma_flags(). This enables us to iteratively make changes to break these changes up into separate parts. We use these explicitly here to keep VM_STACK_FLAGS around for certain users which need to maintain the legacy vm_flags_t values for the time being. We are no longer able to rely on the simple VM_xxx being set to zero if the feature is not enabled, so in the case of VM_DROPPABLE we introduce VMA_DROPPABLE as the vma_flags_t equivalent, which is set to EMPTY_VMA_FLAGS if the droppable flag is not available. While we're here, we make the description of do_brk_flags() into a kdoc comment, as it almost was already. We use vma_flags_to_legacy() to not need to update the vm_get_page_prot() logic as this time. Note that in create_init_stack_vma() we have to replace the BUILD_BUG_ON() with a VM_WARN_ON_ONCE() as the tested values are no longer build time available. We also update mprotect_fixup() to use VMA flags where possible, though we have to live with a little duplication between vm_flags_t and vma_flags_t values for the time being until further conversions are made. While we're here, update VM_SPECIAL to be defined in terms of VMA_SPECIAL_FLAGS now we have vma_flags_to_legacy(). Finally, we update the VMA tests to reflect these changes. Link: https://lkml.kernel.org/r/d02e3e45d9a33d7904b149f5604904089fd640ae.1774034900.git.ljs@kernel.org Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org> Acked-by: Paul Moore <paul@paul-moore.com> [SELinux] Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org> Cc: Albert Ou <aou@eecs.berkeley.edu> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Alexandre Ghiti <alex@ghiti.fr> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com> Cc: "Borislav Petkov (AMD)" <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chengming Zhou <chengming.zhou@linux.dev> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Christian Brauner <brauner@kernel.org> Cc: David Hildenbrand <david@kernel.org> Cc: Dinh Nguyen <dinguyen@kernel.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Jann Horn <jannh@google.com> Cc: Johannes Berg <johannes@sipsolutions.net> Cc: Kees Cook <kees@kernel.org> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Madhavan Srinivasan <maddy@linux.ibm.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Ondrej Mosnacek <omosnace@redhat.com> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Pedro Falcato <pfalcato@suse.de> Cc: Richard Weinberger <richard@nod.at> Cc: Russell King <linux@armlinux.org.uk> Cc: Stephen Smalley <stephen.smalley.work@gmail.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vineet Gupta <vgupta@kernel.org> Cc: WANG Xuerui <kernel@xen0n.name> Cc: Will Deacon <will@kernel.org> Cc: xu xin <xu.xin16@zte.com.cn> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
166 lines
4.5 KiB
C
166 lines
4.5 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
/*
|
|
* Functions explicitly implemented for exec functionality which however are
|
|
* explicitly VMA-only logic.
|
|
*/
|
|
|
|
#include "vma_internal.h"
|
|
#include "vma.h"
|
|
|
|
/*
|
|
* Relocate a VMA downwards by shift bytes. There cannot be any VMAs between
|
|
* this VMA and its relocated range, which will now reside at [vma->vm_start -
|
|
* shift, vma->vm_end - shift).
|
|
*
|
|
* This function is almost certainly NOT what you want for anything other than
|
|
* early executable temporary stack relocation.
|
|
*/
|
|
int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift)
|
|
{
|
|
/*
|
|
* The process proceeds as follows:
|
|
*
|
|
* 1) Use shift to calculate the new vma endpoints.
|
|
* 2) Extend vma to cover both the old and new ranges. This ensures the
|
|
* arguments passed to subsequent functions are consistent.
|
|
* 3) Move vma's page tables to the new range.
|
|
* 4) Free up any cleared pgd range.
|
|
* 5) Shrink the vma to cover only the new range.
|
|
*/
|
|
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
unsigned long old_start = vma->vm_start;
|
|
unsigned long old_end = vma->vm_end;
|
|
unsigned long length = old_end - old_start;
|
|
unsigned long new_start = old_start - shift;
|
|
unsigned long new_end = old_end - shift;
|
|
VMA_ITERATOR(vmi, mm, new_start);
|
|
VMG_STATE(vmg, mm, &vmi, new_start, old_end, EMPTY_VMA_FLAGS,
|
|
vma->vm_pgoff);
|
|
struct vm_area_struct *next;
|
|
struct mmu_gather tlb;
|
|
PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length);
|
|
|
|
BUG_ON(new_start > new_end);
|
|
|
|
/*
|
|
* ensure there are no vmas between where we want to go
|
|
* and where we are
|
|
*/
|
|
if (vma != vma_next(&vmi))
|
|
return -EFAULT;
|
|
|
|
vma_iter_prev_range(&vmi);
|
|
/*
|
|
* cover the whole range: [new_start, old_end)
|
|
*/
|
|
vmg.target = vma;
|
|
if (vma_expand(&vmg))
|
|
return -ENOMEM;
|
|
|
|
/*
|
|
* move the page tables downwards, on failure we rely on
|
|
* process cleanup to remove whatever mess we made.
|
|
*/
|
|
pmc.for_stack = true;
|
|
if (length != move_page_tables(&pmc))
|
|
return -ENOMEM;
|
|
|
|
tlb_gather_mmu(&tlb, mm);
|
|
next = vma_next(&vmi);
|
|
if (new_end > old_start) {
|
|
/*
|
|
* when the old and new regions overlap clear from new_end.
|
|
*/
|
|
free_pgd_range(&tlb, new_end, old_end, new_end,
|
|
next ? next->vm_start : USER_PGTABLES_CEILING);
|
|
} else {
|
|
/*
|
|
* otherwise, clean from old_start; this is done to not touch
|
|
* the address space in [new_end, old_start) some architectures
|
|
* have constraints on va-space that make this illegal (IA64) -
|
|
* for the others its just a little faster.
|
|
*/
|
|
free_pgd_range(&tlb, old_start, old_end, new_end,
|
|
next ? next->vm_start : USER_PGTABLES_CEILING);
|
|
}
|
|
tlb_finish_mmu(&tlb);
|
|
|
|
vma_prev(&vmi);
|
|
/* Shrink the vma to just the new range */
|
|
return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
|
|
}
|
|
|
|
/*
|
|
* Establish the stack VMA in an execve'd process, located temporarily at the
|
|
* maximum stack address provided by the architecture.
|
|
*
|
|
* We later relocate this downwards in relocate_vma_down().
|
|
*
|
|
* This function is almost certainly NOT what you want for anything other than
|
|
* early executable initialisation.
|
|
*
|
|
* On success, returns 0 and sets *vmap to the stack VMA and *top_mem_p to the
|
|
* maximum addressable location in the stack (that is capable of storing a
|
|
* system word of data).
|
|
*/
|
|
int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
|
|
unsigned long *top_mem_p)
|
|
{
|
|
unsigned long flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
|
|
int err;
|
|
struct vm_area_struct *vma = vm_area_alloc(mm);
|
|
|
|
if (!vma)
|
|
return -ENOMEM;
|
|
|
|
vma_set_anonymous(vma);
|
|
|
|
if (mmap_write_lock_killable(mm)) {
|
|
err = -EINTR;
|
|
goto err_free;
|
|
}
|
|
|
|
/*
|
|
* Need to be called with mmap write lock
|
|
* held, to avoid race with ksmd.
|
|
*/
|
|
err = ksm_execve(mm);
|
|
if (err)
|
|
goto err_ksm;
|
|
|
|
/*
|
|
* Place the stack at the largest stack address the architecture
|
|
* supports. Later, we'll move this to an appropriate place. We don't
|
|
* use STACK_TOP because that can depend on attributes which aren't
|
|
* configured yet.
|
|
*/
|
|
VM_WARN_ON_ONCE(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
|
|
vma->vm_end = STACK_TOP_MAX;
|
|
vma->vm_start = vma->vm_end - PAGE_SIZE;
|
|
if (pgtable_supports_soft_dirty())
|
|
flags |= VM_SOFTDIRTY;
|
|
vm_flags_init(vma, flags);
|
|
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
|
|
|
|
err = insert_vm_struct(mm, vma);
|
|
if (err)
|
|
goto err;
|
|
|
|
mm->stack_vm = mm->total_vm = 1;
|
|
mmap_write_unlock(mm);
|
|
*vmap = vma;
|
|
*top_mem_p = vma->vm_end - sizeof(void *);
|
|
return 0;
|
|
|
|
err:
|
|
ksm_exit(mm);
|
|
err_ksm:
|
|
mmap_write_unlock(mm);
|
|
err_free:
|
|
*vmap = NULL;
|
|
vm_area_free(vma);
|
|
return err;
|
|
}
|