Files
linux/arch/x86/lib/memset_64.S
Peter Zijlstra 2981557cb0 x86,kcfi: Fix EXPORT_SYMBOL vs kCFI
The expectation is that all EXPORT'ed symbols are free to have their
address taken and called indirectly. The majority of the assembly
defined functions currently violate this expectation.

Make then all use SYM_TYPED_FUNC_START() in order to emit the proper
kCFI preamble.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Link: https://lore.kernel.org/r/20250207122546.302679189@infradead.org
2025-02-14 10:32:05 +01:00

119 lines
2.4 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright 2002 Andi Kleen, SuSE Labs */
#include <linux/export.h>
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
.section .noinstr.text, "ax"
/*
* ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
* simpler and shorter than the original function as well.
*
* rdi destination
* rsi value (char)
* rdx count (bytes)
*
* rax original destination
*
* The FSRS alternative should be done inline (avoiding the call and
* the disgusting return handling), but that would require some help
* from the compiler for better calling conventions.
*
* The 'rep stosb' itself is small enough to replace the call, but all
* the register moves blow up the code. And two of them are "needed"
* only for the return value that is the same as the source input,
* which the compiler could/should do much better anyway.
*/
SYM_TYPED_FUNC_START(__memset)
ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
rep stosb
movq %r9,%rax
RET
SYM_FUNC_END(__memset)
EXPORT_SYMBOL(__memset)
SYM_FUNC_ALIAS_MEMFUNC(memset, __memset)
EXPORT_SYMBOL(memset)
SYM_FUNC_START_LOCAL(memset_orig)
movq %rdi,%r10
/* expand byte value */
movzbl %sil,%ecx
movabs $0x0101010101010101,%rax
imulq %rcx,%rax
/* align dst */
movl %edi,%r9d
andl $7,%r9d
jnz .Lbad_alignment
.Lafter_bad_alignment:
movq %rdx,%rcx
shrq $6,%rcx
jz .Lhandle_tail
.p2align 4
.Lloop_64:
decq %rcx
movq %rax,(%rdi)
movq %rax,8(%rdi)
movq %rax,16(%rdi)
movq %rax,24(%rdi)
movq %rax,32(%rdi)
movq %rax,40(%rdi)
movq %rax,48(%rdi)
movq %rax,56(%rdi)
leaq 64(%rdi),%rdi
jnz .Lloop_64
/* Handle tail in loops. The loops should be faster than hard
to predict jump tables. */
.p2align 4
.Lhandle_tail:
movl %edx,%ecx
andl $63&(~7),%ecx
jz .Lhandle_7
shrl $3,%ecx
.p2align 4
.Lloop_8:
decl %ecx
movq %rax,(%rdi)
leaq 8(%rdi),%rdi
jnz .Lloop_8
.Lhandle_7:
andl $7,%edx
jz .Lende
.p2align 4
.Lloop_1:
decl %edx
movb %al,(%rdi)
leaq 1(%rdi),%rdi
jnz .Lloop_1
.Lende:
movq %r10,%rax
RET
.Lbad_alignment:
cmpq $7,%rdx
jbe .Lhandle_7
movq %rax,(%rdi) /* unaligned store */
movq $8,%r8
subq %r9,%r8
addq %r8,%rdi
subq %r8,%rdx
jmp .Lafter_bad_alignment
.Lfinal:
SYM_FUNC_END(memset_orig)