Merge tag 'loongarch-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson

Pull LoongArch updates from Huacai Chen:
 - Select HAVE_CMPXCHG_{LOCAL,DOUBLE}
 - Add 128-bit atomic cmpxchg support
 - Add HOTPLUG_SMT implementation
 - Wire up memfd_secret system call
 - Fix boot errors and unwind errors for KASAN
 - Use BPF prog pack allocator and add BPF arena support
 - Update dts files to add nand controllers
 - Some bug fixes and other small changes

* tag 'loongarch-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson:
  LoongArch: dts: loongson-2k1000: Add nand controller support
  LoongArch: dts: loongson-2k0500: Add nand controller support
  LoongArch: BPF: Implement bpf_addr_space_cast instruction
  LoongArch: BPF: Implement PROBE_MEM32 pseudo instructions
  LoongArch: BPF: Use BPF prog pack allocator
  LoongArch: Use IS_ERR_PCPU() macro for KGDB
  LoongArch: Rework KASAN initialization for PTW-enabled systems
  LoongArch: Disable instrumentation for setup_ptwalker()
  LoongArch: Remove some extern variables in source files
  LoongArch: Guard percpu handler under !CONFIG_PREEMPT_RT
  LoongArch: Handle percpu handler address for ORC unwinder
  LoongArch: Use %px to print unmodified unwinding address
  LoongArch: Prefer top-down allocation after arch_mem_init()
  LoongArch: Add HOTPLUG_SMT implementation
  LoongArch: Make cpumask_of_node() robust against NUMA_NO_NODE
  LoongArch: Wire up memfd_secret system call
  LoongArch: Replace seq_printf() with seq_puts() for simple strings
  LoongArch: Add 128-bit atomic cmpxchg support
  LoongArch: Add detection for SC.Q support
  LoongArch: Select HAVE_CMPXCHG_LOCAL in Kconfig
This commit is contained in:
Linus Torvalds
2026-02-14 12:47:15 -08:00
26 changed files with 450 additions and 153 deletions

View File

@@ -17,6 +17,7 @@
#define LOONGARCH_BPF_FENTRY_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4)
#define REG_TCC LOONGARCH_GPR_A6
#define REG_ARENA LOONGARCH_GPR_S6 /* For storing arena_vm_start */
#define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (round_up(stack, 16) - 80)
static const int regmap[] = {
@@ -136,6 +137,9 @@ static void build_prologue(struct jit_ctx *ctx)
/* To store tcc and tcc_ptr */
stack_adjust += sizeof(long) * 2;
if (ctx->arena_vm_start)
stack_adjust += 8;
stack_adjust = round_up(stack_adjust, 16);
stack_adjust += bpf_stack_adjust;
@@ -178,6 +182,11 @@ static void build_prologue(struct jit_ctx *ctx)
store_offset -= sizeof(long);
emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
if (ctx->arena_vm_start) {
store_offset -= sizeof(long);
emit_insn(ctx, std, REG_ARENA, LOONGARCH_GPR_SP, store_offset);
}
prepare_bpf_tail_call_cnt(ctx, &store_offset);
emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
@@ -186,6 +195,9 @@ static void build_prologue(struct jit_ctx *ctx)
emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
ctx->stack_size = stack_adjust;
if (ctx->arena_vm_start)
move_imm(ctx, REG_ARENA, ctx->arena_vm_start, false);
}
static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
@@ -217,6 +229,11 @@ static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
load_offset -= sizeof(long);
emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
if (ctx->arena_vm_start) {
load_offset -= sizeof(long);
emit_insn(ctx, ldd, REG_ARENA, LOONGARCH_GPR_SP, load_offset);
}
/*
* When push into the stack, follow the order of tcc then tcc_ptr.
* When pop from the stack, first pop tcc_ptr then followed by tcc.
@@ -442,6 +459,7 @@ static bool is_signed_bpf_cond(u8 cond)
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
#define REG_DONT_CLEAR_MARKER 0
bool ex_handler_bpf(const struct exception_table_entry *ex,
struct pt_regs *regs)
@@ -449,7 +467,8 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
regs->regs[dst_reg] = 0;
if (dst_reg != REG_DONT_CLEAR_MARKER)
regs->regs[dst_reg] = 0;
regs->csr_era = (unsigned long)&ex->fixup - offset;
return true;
@@ -461,28 +480,33 @@ static int add_exception_handler(const struct bpf_insn *insn,
int dst_reg)
{
unsigned long pc;
off_t offset;
off_t ins_offset, fixup_offset;
struct exception_table_entry *ex;
if (!ctx->image || !ctx->prog->aux->extable)
if (!ctx->image || !ctx->ro_image || !ctx->prog->aux->extable)
return 0;
if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
BPF_MODE(insn->code) != BPF_PROBE_MEMSX)
BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
BPF_MODE(insn->code) != BPF_PROBE_MEM32)
return 0;
if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries))
return -EINVAL;
ex = &ctx->prog->aux->extable[ctx->num_exentries];
pc = (unsigned long)&ctx->image[ctx->idx - 1];
pc = (unsigned long)&ctx->ro_image[ctx->idx - 1];
offset = pc - (long)&ex->insn;
if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
/*
* This is the relative offset of the instruction that may fault from
* the exception table itself. This will be written to the exception
* table and if this instruction faults, the destination register will
* be set to '0' and the execution will jump to the next instruction.
*/
ins_offset = pc - (long)&ex->insn;
if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
return -ERANGE;
ex->insn = offset;
/*
* Since the extable follows the program, the fixup offset is always
* negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
@@ -490,13 +514,23 @@ static int add_exception_handler(const struct bpf_insn *insn,
* bits. We don't need to worry about buildtime or runtime sort
* modifying the upper bits because the table is already sorted, and
* isn't part of the main exception table.
*
* The fixup_offset is set to the next instruction from the instruction
* that may fault. The execution will jump to this after handling the fault.
*/
offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE);
if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
fixup_offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE);
if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
return -ERANGE;
/*
* The offsets above have been calculated using the RO buffer but we
* need to use the R/W buffer for writes. Switch ex to rw buffer for writing.
*/
ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image);
ex->insn = ins_offset;
ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
ex->type = EX_TYPE_BPF;
ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
ctx->num_exentries++;
@@ -514,8 +548,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
const u8 cond = BPF_OP(code);
const u8 t1 = LOONGARCH_GPR_T1;
const u8 t2 = LOONGARCH_GPR_T2;
const u8 src = regmap[insn->src_reg];
const u8 dst = regmap[insn->dst_reg];
const u8 t3 = LOONGARCH_GPR_T3;
u8 src = regmap[insn->src_reg];
u8 dst = regmap[insn->dst_reg];
const s16 off = insn->off;
const s32 imm = insn->imm;
const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
@@ -524,6 +559,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
/* dst = src */
case BPF_ALU | BPF_MOV | BPF_X:
case BPF_ALU64 | BPF_MOV | BPF_X:
if (insn_is_cast_user(insn)) {
move_reg(ctx, t1, src);
emit_zext_32(ctx, t1, true);
move_imm(ctx, dst, (ctx->user_vm_start >> 32) << 32, false);
emit_insn(ctx, beq, t1, LOONGARCH_GPR_ZERO, 1);
emit_insn(ctx, or, t1, dst, t1);
move_reg(ctx, dst, t1);
break;
}
switch (off) {
case 0:
move_reg(ctx, dst, src);
@@ -1021,8 +1065,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
sign_extend = BPF_MODE(insn->code) == BPF_MEMSX ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
/* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + REG_ARENA + off) */
case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
sign_extend = BPF_MODE(code) == BPF_MEMSX ||
BPF_MODE(code) == BPF_PROBE_MEMSX;
if (BPF_MODE(code) == BPF_PROBE_MEM32) {
emit_insn(ctx, addd, t2, src, REG_ARENA);
src = t2;
}
switch (BPF_SIZE(code)) {
case BPF_B:
if (is_signed_imm12(off)) {
@@ -1082,6 +1137,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
case BPF_ST | BPF_MEM | BPF_H:
case BPF_ST | BPF_MEM | BPF_W:
case BPF_ST | BPF_MEM | BPF_DW:
/* ST | PROBE_MEM32: *(size *)(dst + REG_ARENA + off) = imm */
case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
if (BPF_MODE(code) == BPF_PROBE_MEM32) {
emit_insn(ctx, addd, t3, dst, REG_ARENA);
dst = t3;
}
switch (BPF_SIZE(code)) {
case BPF_B:
move_imm(ctx, t1, imm, is32);
@@ -1124,6 +1189,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
}
break;
}
ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER);
if (ret)
return ret;
break;
/* *(size *)(dst + off) = src */
@@ -1131,6 +1200,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
case BPF_STX | BPF_MEM | BPF_H:
case BPF_STX | BPF_MEM | BPF_W:
case BPF_STX | BPF_MEM | BPF_DW:
/* STX | PROBE_MEM32: *(size *)(dst + REG_ARENA + off) = src */
case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
if (BPF_MODE(code) == BPF_PROBE_MEM32) {
emit_insn(ctx, addd, t2, dst, REG_ARENA);
dst = t2;
}
switch (BPF_SIZE(code)) {
case BPF_B:
if (is_signed_imm12(off)) {
@@ -1169,6 +1248,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
}
break;
}
ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER);
if (ret)
return ret;
break;
case BPF_STX | BPF_ATOMIC | BPF_W:
@@ -1829,11 +1912,12 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
bool tmp_blinded = false, extra_pass = false;
u8 *image_ptr;
u8 *image_ptr, *ro_image_ptr;
int image_size, prog_size, extable_size;
struct jit_ctx ctx;
struct jit_data *jit_data;
struct bpf_binary_header *header;
struct bpf_binary_header *ro_header;
struct bpf_prog *tmp, *orig_prog = prog;
/*
@@ -1868,8 +1952,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}
if (jit_data->ctx.offset) {
ctx = jit_data->ctx;
image_ptr = jit_data->image;
ro_header = jit_data->ro_header;
ro_image_ptr = (void *)ctx.ro_image;
header = jit_data->header;
image_ptr = (void *)header + ((void *)ro_image_ptr - (void *)ro_header);
extra_pass = true;
prog_size = sizeof(u32) * ctx.idx;
goto skip_init_ctx;
@@ -1877,6 +1963,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
memset(&ctx, 0, sizeof(ctx));
ctx.prog = prog;
ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
if (ctx.offset == NULL) {
@@ -1903,17 +1991,25 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog_size = sizeof(u32) * ctx.idx;
image_size = prog_size + extable_size;
/* Now we know the size of the structure to make */
header = bpf_jit_binary_alloc(image_size, &image_ptr,
sizeof(u32), jit_fill_hole);
if (header == NULL) {
ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, sizeof(u32),
&header, &image_ptr, jit_fill_hole);
if (!ro_header) {
prog = orig_prog;
goto out_offset;
}
/* 2. Now, the actual pass to generate final JIT code */
/*
* Use the image (RW) for writing the JITed instructions. But also save
* the ro_image (RX) for calculating the offsets in the image. The RW
* image will be later copied to the RX image from where the program will
* run. The bpf_jit_binary_pack_finalize() will do this copy in the final
* step.
*/
ctx.image = (union loongarch_instruction *)image_ptr;
ctx.ro_image = (union loongarch_instruction *)ro_image_ptr;
if (extable_size)
prog->aux->extable = (void *)image_ptr + prog_size;
prog->aux->extable = (void *)ro_image_ptr + prog_size;
skip_init_ctx:
ctx.idx = 0;
@@ -1921,48 +2017,47 @@ skip_init_ctx:
build_prologue(&ctx);
if (build_body(&ctx, extra_pass)) {
bpf_jit_binary_free(header);
prog = orig_prog;
goto out_offset;
goto out_free;
}
build_epilogue(&ctx);
/* 3. Extra pass to validate JITed code */
if (validate_ctx(&ctx)) {
bpf_jit_binary_free(header);
prog = orig_prog;
goto out_offset;
goto out_free;
}
/* And we're done */
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
/* Update the icache */
flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx));
if (!prog->is_func || extra_pass) {
int err;
if (extra_pass && ctx.idx != jit_data->ctx.idx) {
pr_err_once("multi-func JIT bug %d != %d\n",
ctx.idx, jit_data->ctx.idx);
goto out_free;
}
err = bpf_jit_binary_lock_ro(header);
if (err) {
pr_err_once("bpf_jit_binary_lock_ro() returned %d\n",
err);
if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) {
/* ro_header has been freed */
ro_header = NULL;
prog = orig_prog;
goto out_free;
}
/*
* The instructions have now been copied to the ROX region from
* where they will execute. Now the data cache has to be cleaned
* to the PoU and the I-cache has to be invalidated for the VAs.
*/
bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx);
} else {
jit_data->ctx = ctx;
jit_data->image = image_ptr;
jit_data->header = header;
jit_data->ro_header = ro_header;
}
prog->jited = 1;
prog->jited_len = prog_size;
prog->bpf_func = (void *)ctx.image;
prog->bpf_func = (void *)ctx.ro_image;
if (!prog->is_func || extra_pass) {
int i;
@@ -1982,17 +2077,39 @@ out:
if (tmp_blinded)
bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
return prog;
out_free:
bpf_jit_binary_free(header);
prog->bpf_func = NULL;
prog->jited = 0;
prog->jited_len = 0;
if (header) {
bpf_arch_text_copy(&ro_header->size, &header->size, sizeof(header->size));
bpf_jit_binary_pack_free(ro_header, header);
}
goto out_offset;
}
void bpf_jit_free(struct bpf_prog *prog)
{
if (prog->jited) {
struct jit_data *jit_data = prog->aux->jit_data;
struct bpf_binary_header *hdr;
/*
* If we fail the final pass of JIT (from jit_subprogs), the
* program may not be finalized yet. Call finalize here before
* freeing it.
*/
if (jit_data) {
bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header);
kfree(jit_data);
}
hdr = bpf_jit_binary_pack_hdr(prog);
bpf_jit_binary_pack_free(hdr, NULL);
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
}
bpf_prog_unlock_free(prog);
}
bool bpf_jit_bypass_spec_v1(void)
{
return true;
@@ -2003,6 +2120,11 @@ bool bpf_jit_bypass_spec_v4(void)
return true;
}
bool bpf_jit_supports_arena(void)
{
return true;
}
/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
bool bpf_jit_supports_subprog_tailcalls(void)
{