Merge branch 'bpf-allow-utf-8-literals-in-bpf_bprintf_prepare'

Yihan Ding says:

====================
bpf: allow UTF-8 literals in bpf_bprintf_prepare()

bpf_bprintf_prepare() currently rejects any non-ASCII byte in format
strings, so helpers such as bpf_trace_printk() fail to emit UTF-8
literal text even when those bytes are not part of a format specifier.

Keep plain text permissive while continuing to parse '%' sequences as
ASCII-only. Patch 1 updates snprintf_negative() at the same time so the
selftests stay consistent during bisection. Patch 2 then extends
trace_printk coverage for both the valid UTF-8 literal case and the
invalid non-ASCII-after-'%' case.

Changes in v3:
- drop Suggested-by trailers and move review credit into this changelog
- update test_snprintf_negative() in patch 1/2 so plain non-ASCII text is
  accepted while non-ASCII after '%' is still rejected, keeping
  ./test_progs -t snprintf aligned with the new behavior.
- clarify the trace_printk negative case with an explicit invalid format
  string and comment
- address Paul Chaignon's review feedback and keep the negative coverage
  requested earlier by Alan Maguire

Changes in v2:
- split the core change and selftest updates into two patches
- drop unnecessary isspace()/ispunct() casts
- add comments to clarify plain-text vs format-specifier handling
- add a negative selftest for non-ASCII bytes inside '%' sequences

Testing:
- Reproduced on x86_64 without the core fix: ASCII trace output works,
  while UTF-8 literal text in bpf_trace_printk() is rejected and
  produces no trace output
- Verified with tools/testing/selftests/bpf: ./test_progs -t trace_printk
- Verified with tools/testing/selftests/bpf: ./test_progs -t snprintf
====================

Link: https://patch.msgid.link/20260416120142.1420646-1-dingyihan@uniontech.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov
2026-04-16 15:53:32 -07:00
4 changed files with 50 additions and 8 deletions

View File

@@ -845,7 +845,13 @@ int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args,
data->buf = buffers->buf;
for (i = 0; i < fmt_size; i++) {
if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
unsigned char c = fmt[i];
/*
* Permit bytes >= 0x80 in plain text so UTF-8 literals can pass
* through unchanged, while still rejecting ASCII control bytes.
*/
if (isascii(c) && !isprint(c) && !isspace(c)) {
err = -EINVAL;
goto out;
}
@@ -867,6 +873,15 @@ int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args,
* always access fmt[i + 1], in the worst case it will be a 0
*/
i++;
c = fmt[i];
/*
* The format parser below only understands ASCII conversion
* specifiers and modifiers, so reject non-ASCII after '%'.
*/
if (!isascii(c)) {
err = -EINVAL;
goto out;
}
/* skip optional "[0 +-][num]" width formatting field */
while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||

View File

@@ -114,7 +114,8 @@ static void test_snprintf_negative(void)
ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6");
ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
ASSERT_OK(load_single_snprintf("\x80"), "non ascii plain text");
ASSERT_ERR(load_single_snprintf("%\x80"), "non ascii in specifier");
ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8");
ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9");

View File

@@ -6,18 +6,21 @@
#include "trace_printk.lskel.h"
#define SEARCHMSG "testing,testing"
#define SEARCHMSG_UTF8 "中文,测试"
static void trace_pipe_cb(const char *str, void *data)
{
if (strstr(str, SEARCHMSG) != NULL)
(*(int *)data)++;
((int *)data)[0]++;
if (strstr(str, SEARCHMSG_UTF8))
((int *)data)[1]++;
}
void serial_test_trace_printk(void)
{
struct trace_printk_lskel__bss *bss;
struct trace_printk_lskel *skel;
int err = 0, found = 0;
int err = 0, found[2] = {};
skel = trace_printk_lskel__open();
if (!ASSERT_OK_PTR(skel, "trace_printk__open"))
@@ -46,11 +49,24 @@ void serial_test_trace_printk(void)
if (!ASSERT_GT(bss->trace_printk_ret, 0, "bss->trace_printk_ret"))
goto cleanup;
/* verify our search string is in the trace buffer */
ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000),
if (!ASSERT_GT(bss->trace_printk_utf8_ran, 0, "bss->trace_printk_utf8_ran"))
goto cleanup;
if (!ASSERT_GT(bss->trace_printk_utf8_ret, 0, "bss->trace_printk_utf8_ret"))
goto cleanup;
if (!ASSERT_LT(bss->trace_printk_invalid_spec_ret, 0,
"bss->trace_printk_invalid_spec_ret"))
goto cleanup;
/* verify our search strings are in the trace buffer */
ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, found, 1000),
"read_trace_pipe_iter");
if (!ASSERT_EQ(found, bss->trace_printk_ran, "found"))
if (!ASSERT_EQ(found[0], bss->trace_printk_ran, "found"))
goto cleanup;
if (!ASSERT_EQ(found[1], bss->trace_printk_utf8_ran, "found_utf8"))
goto cleanup;
cleanup:

View File

@@ -10,13 +10,23 @@ char _license[] SEC("license") = "GPL";
int trace_printk_ret = 0;
int trace_printk_ran = 0;
int trace_printk_invalid_spec_ret = 0;
int trace_printk_utf8_ret = 0;
int trace_printk_utf8_ran = 0;
const char fmt[] = "Testing,testing %d\n";
static const char utf8_fmt[] = "中文,测试 %d\n";
/* Non-ASCII bytes after '%' must still be rejected. */
static const char invalid_spec_fmt[] = "%\x80\n";
SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int sys_enter(void *ctx)
{
trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
++trace_printk_ran);
trace_printk_utf8_ret = bpf_trace_printk(utf8_fmt, sizeof(utf8_fmt),
++trace_printk_utf8_ran);
trace_printk_invalid_spec_ret = bpf_trace_printk(invalid_spec_fmt,
sizeof(invalid_spec_fmt));
return 0;
}