Files
linux/kernel/bpf/states.c
Alexei Starovoitov c82834a5a1 bpf: Move state equivalence logic to states.c
verifier.c is huge. Move is_state_visited() to states.c,
so that all state equivalence logic is in one file.

Mechanical move. No functional changes.

Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20260412152936.54262-5-alexei.starovoitov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-04-12 12:36:52 -07:00

1564 lines
52 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
#include <linux/bpf.h>
#include <linux/bpf_verifier.h>
#include <linux/filter.h>
#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
#define BPF_COMPLEXITY_LIMIT_STATES 64
static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
{
return bpf_is_may_goto_insn(&env->prog->insnsi[insn_idx]);
}
static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
{
return env->insn_aux_data[insn_idx].is_iter_next;
}
static void update_peak_states(struct bpf_verifier_env *env)
{
u32 cur_states;
cur_states = env->explored_states_size + env->free_list_size + env->num_backedges;
env->peak_states = max(env->peak_states, cur_states);
}
/* struct bpf_verifier_state->parent refers to states
* that are in either of env->{expored_states,free_list}.
* In both cases the state is contained in struct bpf_verifier_state_list.
*/
static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st)
{
if (st->parent)
return container_of(st->parent, struct bpf_verifier_state_list, state);
return NULL;
}
static bool incomplete_read_marks(struct bpf_verifier_env *env,
struct bpf_verifier_state *st);
/* A state can be freed if it is no longer referenced:
* - is in the env->free_list;
* - has no children states;
*/
static void maybe_free_verifier_state(struct bpf_verifier_env *env,
struct bpf_verifier_state_list *sl)
{
if (!sl->in_free_list
|| sl->state.branches != 0
|| incomplete_read_marks(env, &sl->state))
return;
list_del(&sl->node);
bpf_free_verifier_state(&sl->state, false);
kfree(sl);
env->free_list_size--;
}
/* For state @st look for a topmost frame with frame_insn_idx() in some SCC,
* if such frame exists form a corresponding @callchain as an array of
* call sites leading to this frame and SCC id.
* E.g.:
*
* void foo() { A: loop {... SCC#1 ...}; }
* void bar() { B: loop { C: foo(); ... SCC#2 ... }
* D: loop { E: foo(); ... SCC#3 ... } }
* void main() { F: bar(); }
*
* @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending
* on @st frame call sites being (F,C,A) or (F,E,A).
*/
static bool compute_scc_callchain(struct bpf_verifier_env *env,
struct bpf_verifier_state *st,
struct bpf_scc_callchain *callchain)
{
u32 i, scc, insn_idx;
memset(callchain, 0, sizeof(*callchain));
for (i = 0; i <= st->curframe; i++) {
insn_idx = bpf_frame_insn_idx(st, i);
scc = env->insn_aux_data[insn_idx].scc;
if (scc) {
callchain->scc = scc;
break;
} else if (i < st->curframe) {
callchain->callsites[i] = insn_idx;
} else {
return false;
}
}
return true;
}
/* Check if bpf_scc_visit instance for @callchain exists. */
static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env,
struct bpf_scc_callchain *callchain)
{
struct bpf_scc_info *info = env->scc_info[callchain->scc];
struct bpf_scc_visit *visits = info->visits;
u32 i;
if (!info)
return NULL;
for (i = 0; i < info->num_visits; i++)
if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0)
return &visits[i];
return NULL;
}
/* Allocate a new bpf_scc_visit instance corresponding to @callchain.
* Allocated instances are alive for a duration of the do_check_common()
* call and are freed by free_states().
*/
static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env,
struct bpf_scc_callchain *callchain)
{
struct bpf_scc_visit *visit;
struct bpf_scc_info *info;
u32 scc, num_visits;
u64 new_sz;
scc = callchain->scc;
info = env->scc_info[scc];
num_visits = info ? info->num_visits : 0;
new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1);
info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT);
if (!info)
return NULL;
env->scc_info[scc] = info;
info->num_visits = num_visits + 1;
visit = &info->visits[num_visits];
memset(visit, 0, sizeof(*visit));
memcpy(&visit->callchain, callchain, sizeof(*callchain));
return visit;
}
/* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */
static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain)
{
char *buf = env->tmp_str_buf;
int i, delta = 0;
delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "(");
for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) {
if (!callchain->callsites[i])
break;
delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,",
callchain->callsites[i]);
}
delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc);
return env->tmp_str_buf;
}
/* If callchain for @st exists (@st is in some SCC), ensure that
* bpf_scc_visit instance for this callchain exists.
* If instance does not exist or is empty, assign visit->entry_state to @st.
*/
static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
{
struct bpf_scc_callchain *callchain = &env->callchain_buf;
struct bpf_scc_visit *visit;
if (!compute_scc_callchain(env, st, callchain))
return 0;
visit = scc_visit_lookup(env, callchain);
visit = visit ?: scc_visit_alloc(env, callchain);
if (!visit)
return -ENOMEM;
if (!visit->entry_state) {
visit->entry_state = st;
if (env->log.level & BPF_LOG_LEVEL2)
verbose(env, "SCC enter %s\n", format_callchain(env, callchain));
}
return 0;
}
static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit);
/* If callchain for @st exists (@st is in some SCC), make it empty:
* - set visit->entry_state to NULL;
* - flush accumulated backedges.
*/
static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
{
struct bpf_scc_callchain *callchain = &env->callchain_buf;
struct bpf_scc_visit *visit;
if (!compute_scc_callchain(env, st, callchain))
return 0;
visit = scc_visit_lookup(env, callchain);
if (!visit) {
/*
* If path traversal stops inside an SCC, corresponding bpf_scc_visit
* must exist for non-speculative paths. For non-speculative paths
* traversal stops when:
* a. Verification error is found, maybe_exit_scc() is not called.
* b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member
* of any SCC.
* c. A checkpoint is reached and matched. Checkpoints are created by
* is_state_visited(), which calls maybe_enter_scc(), which allocates
* bpf_scc_visit instances for checkpoints within SCCs.
* (c) is the only case that can reach this point.
*/
if (!st->speculative) {
verifier_bug(env, "scc exit: no visit info for call chain %s",
format_callchain(env, callchain));
return -EFAULT;
}
return 0;
}
if (visit->entry_state != st)
return 0;
if (env->log.level & BPF_LOG_LEVEL2)
verbose(env, "SCC exit %s\n", format_callchain(env, callchain));
visit->entry_state = NULL;
env->num_backedges -= visit->num_backedges;
visit->num_backedges = 0;
update_peak_states(env);
return propagate_backedges(env, visit);
}
/* Lookup an bpf_scc_visit instance corresponding to @st callchain
* and add @backedge to visit->backedges. @st callchain must exist.
*/
static int add_scc_backedge(struct bpf_verifier_env *env,
struct bpf_verifier_state *st,
struct bpf_scc_backedge *backedge)
{
struct bpf_scc_callchain *callchain = &env->callchain_buf;
struct bpf_scc_visit *visit;
if (!compute_scc_callchain(env, st, callchain)) {
verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d",
st->insn_idx);
return -EFAULT;
}
visit = scc_visit_lookup(env, callchain);
if (!visit) {
verifier_bug(env, "add backedge: no visit info for call chain %s",
format_callchain(env, callchain));
return -EFAULT;
}
if (env->log.level & BPF_LOG_LEVEL2)
verbose(env, "SCC backedge %s\n", format_callchain(env, callchain));
backedge->next = visit->backedges;
visit->backedges = backedge;
visit->num_backedges++;
env->num_backedges++;
update_peak_states(env);
return 0;
}
/* bpf_reg_state->live marks for registers in a state @st are incomplete,
* if state @st is in some SCC and not all execution paths starting at this
* SCC are fully explored.
*/
static bool incomplete_read_marks(struct bpf_verifier_env *env,
struct bpf_verifier_state *st)
{
struct bpf_scc_callchain *callchain = &env->callchain_buf;
struct bpf_scc_visit *visit;
if (!compute_scc_callchain(env, st, callchain))
return false;
visit = scc_visit_lookup(env, callchain);
if (!visit)
return false;
return !!visit->backedges;
}
int bpf_update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
{
struct bpf_verifier_state_list *sl = NULL, *parent_sl;
struct bpf_verifier_state *parent;
int err;
while (st) {
u32 br = --st->branches;
/* verifier_bug_if(br > 1, ...) technically makes sense here,
* but see comment in push_stack(), hence:
*/
verifier_bug_if((int)br < 0, env, "%s:branches_to_explore=%d", __func__, br);
if (br)
break;
err = maybe_exit_scc(env, st);
if (err)
return err;
parent = st->parent;
parent_sl = state_parent_as_list(st);
if (sl)
maybe_free_verifier_state(env, sl);
st = parent;
sl = parent_sl;
}
return 0;
}
/* check %cur's range satisfies %old's */
static bool range_within(const struct bpf_reg_state *old,
const struct bpf_reg_state *cur)
{
return old->umin_value <= cur->umin_value &&
old->umax_value >= cur->umax_value &&
old->smin_value <= cur->smin_value &&
old->smax_value >= cur->smax_value &&
old->u32_min_value <= cur->u32_min_value &&
old->u32_max_value >= cur->u32_max_value &&
old->s32_min_value <= cur->s32_min_value &&
old->s32_max_value >= cur->s32_max_value;
}
/* If in the old state two registers had the same id, then they need to have
* the same id in the new state as well. But that id could be different from
* the old state, so we need to track the mapping from old to new ids.
* Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
* regs with old id 5 must also have new id 9 for the new state to be safe. But
* regs with a different old id could still have new id 9, we don't care about
* that.
* So we look through our idmap to see if this old id has been seen before. If
* so, we require the new id to match; otherwise, we add the id pair to the map.
*/
static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
{
struct bpf_id_pair *map = idmap->map;
unsigned int i;
/* either both IDs should be set or both should be zero */
if (!!old_id != !!cur_id)
return false;
if (old_id == 0) /* cur_id == 0 as well */
return true;
for (i = 0; i < idmap->cnt; i++) {
if (map[i].old == old_id)
return map[i].cur == cur_id;
if (map[i].cur == cur_id)
return false;
}
/* Reached the end of known mappings; haven't seen this id before */
if (idmap->cnt < BPF_ID_MAP_SIZE) {
map[idmap->cnt].old = old_id;
map[idmap->cnt].cur = cur_id;
idmap->cnt++;
return true;
}
/* We ran out of idmap slots, which should be impossible */
WARN_ON_ONCE(1);
return false;
}
/*
* Compare scalar register IDs for state equivalence.
*
* When old_id == 0, the old register is independent - not linked to any
* other register. Any linking in the current state only adds constraints,
* making it more restrictive. Since the old state didn't rely on any ID
* relationships for this register, it's always safe to accept cur regardless
* of its ID. Hence, return true immediately.
*
* When old_id != 0 but cur_id == 0, we need to ensure that different
* independent registers in cur don't incorrectly satisfy the ID matching
* requirements of linked registers in old.
*
* Example: if old has r6.id=X and r7.id=X (linked), but cur has r6.id=0
* and r7.id=0 (both independent), without temp IDs both would map old_id=X
* to cur_id=0 and pass. With temp IDs: r6 maps X->temp1, r7 tries to map
* X->temp2, but X is already mapped to temp1, so the check fails correctly.
*
* When old_id has BPF_ADD_CONST set, the compound id (base | flag) and the
* base id (flag stripped) must both map consistently. Example: old has
* r2.id=A, r3.id=A|flag (r3 = r2 + delta), cur has r2.id=B, r3.id=C|flag
* (r3 derived from unrelated r4). Without the base check, idmap gets two
* independent entries A->B and A|flag->C|flag, missing that A->C conflicts
* with A->B. The base ID cross-check catches this.
*/
static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
{
if (!old_id)
return true;
cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
if (!check_ids(old_id, cur_id, idmap))
return false;
if (old_id & BPF_ADD_CONST) {
old_id &= ~BPF_ADD_CONST;
cur_id &= ~BPF_ADD_CONST;
if (!check_ids(old_id, cur_id, idmap))
return false;
}
return true;
}
static void __clean_func_state(struct bpf_verifier_env *env,
struct bpf_func_state *st,
u16 live_regs, int frame)
{
int i, j;
for (i = 0; i < BPF_REG_FP; i++) {
/* liveness must not touch this register anymore */
if (!(live_regs & BIT(i)))
/* since the register is unused, clear its state
* to make further comparison simpler
*/
bpf_mark_reg_not_init(env, &st->regs[i]);
}
/*
* Clean dead 4-byte halves within each SPI independently.
* half_spi 2*i → lower half: slot_type[0..3] (closer to FP)
* half_spi 2*i+1 → upper half: slot_type[4..7] (farther from FP)
*/
for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
bool lo_live = bpf_stack_slot_alive(env, frame, i * 2);
bool hi_live = bpf_stack_slot_alive(env, frame, i * 2 + 1);
if (!hi_live || !lo_live) {
int start = !lo_live ? 0 : BPF_REG_SIZE / 2;
int end = !hi_live ? BPF_REG_SIZE : BPF_REG_SIZE / 2;
u8 stype = st->stack[i].slot_type[7];
/*
* Don't clear special slots.
* destroy_if_dynptr_stack_slot() needs STACK_DYNPTR to
* detect overwrites and invalidate associated data slices.
* is_iter_reg_valid_uninit() and is_irq_flag_reg_valid_uninit()
* check for their respective slot types to detect double-create.
*/
if (stype == STACK_DYNPTR || stype == STACK_ITER ||
stype == STACK_IRQ_FLAG)
continue;
/*
* Only destroy spilled_ptr when hi half is dead.
* If hi half is still live with STACK_SPILL, the
* spilled_ptr metadata is needed for correct state
* comparison in stacksafe().
* is_spilled_reg() is using slot_type[7], but
* is_spilled_scalar_after() check either slot_type[0] or [4]
*/
if (!hi_live) {
struct bpf_reg_state *spill = &st->stack[i].spilled_ptr;
if (lo_live && stype == STACK_SPILL) {
u8 val = STACK_MISC;
/*
* 8 byte spill of scalar 0 where half slot is dead
* should become STACK_ZERO in lo 4 bytes.
*/
if (bpf_register_is_null(spill))
val = STACK_ZERO;
for (j = 0; j < 4; j++) {
u8 *t = &st->stack[i].slot_type[j];
if (*t == STACK_SPILL)
*t = val;
}
}
bpf_mark_reg_not_init(env, spill);
}
for (j = start; j < end; j++)
st->stack[i].slot_type[j] = STACK_POISON;
}
}
}
static int clean_verifier_state(struct bpf_verifier_env *env,
struct bpf_verifier_state *st)
{
int i, err;
err = bpf_live_stack_query_init(env, st);
if (err)
return err;
for (i = 0; i <= st->curframe; i++) {
u32 ip = bpf_frame_insn_idx(st, i);
u16 live_regs = env->insn_aux_data[ip].live_regs_before;
__clean_func_state(env, st->frame[i], live_regs, i);
}
return 0;
}
static bool regs_exact(const struct bpf_reg_state *rold,
const struct bpf_reg_state *rcur,
struct bpf_idmap *idmap)
{
return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
check_ids(rold->id, rcur->id, idmap) &&
check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
}
enum exact_level {
NOT_EXACT,
EXACT,
RANGE_WITHIN
};
/* Returns true if (rold safe implies rcur safe) */
static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
struct bpf_reg_state *rcur, struct bpf_idmap *idmap,
enum exact_level exact)
{
if (exact == EXACT)
return regs_exact(rold, rcur, idmap);
if (rold->type == NOT_INIT)
/* explored state can't have used this */
return true;
/* Enforce that register types have to match exactly, including their
* modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
* rule.
*
* One can make a point that using a pointer register as unbounded
* SCALAR would be technically acceptable, but this could lead to
* pointer leaks because scalars are allowed to leak while pointers
* are not. We could make this safe in special cases if root is
* calling us, but it's probably not worth the hassle.
*
* Also, register types that are *not* MAYBE_NULL could technically be
* safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
* is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
* to the same map).
* However, if the old MAYBE_NULL register then got NULL checked,
* doing so could have affected others with the same id, and we can't
* check for that because we lost the id when we converted to
* a non-MAYBE_NULL variant.
* So, as a general rule we don't allow mixing MAYBE_NULL and
* non-MAYBE_NULL registers as well.
*/
if (rold->type != rcur->type)
return false;
switch (base_type(rold->type)) {
case SCALAR_VALUE:
if (env->explore_alu_limits) {
/* explore_alu_limits disables tnum_in() and range_within()
* logic and requires everything to be strict
*/
return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
check_scalar_ids(rold->id, rcur->id, idmap);
}
if (!rold->precise && exact == NOT_EXACT)
return true;
/*
* Linked register tracking uses rold->id to detect relationships.
* When rold->id == 0, the register is independent and any linking
* in rcur only adds constraints. When rold->id != 0, we must verify
* id mapping and (for BPF_ADD_CONST) offset consistency.
*
* +------------------+-----------+------------------+---------------+
* | | rold->id | rold + ADD_CONST | rold->id == 0 |
* |------------------+-----------+------------------+---------------|
* | rcur->id | range,ids | false | range |
* | rcur + ADD_CONST | false | range,ids,off | range |
* | rcur->id == 0 | range,ids | false | range |
* +------------------+-----------+------------------+---------------+
*
* Why check_ids() for scalar registers?
*
* Consider the following BPF code:
* 1: r6 = ... unbound scalar, ID=a ...
* 2: r7 = ... unbound scalar, ID=b ...
* 3: if (r6 > r7) goto +1
* 4: r6 = r7
* 5: if (r6 > X) goto ...
* 6: ... memory operation using r7 ...
*
* First verification path is [1-6]:
* - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
* - at (5) r6 would be marked <= X, sync_linked_regs() would also mark
* r7 <= X, because r6 and r7 share same id.
* Next verification path is [1-4, 6].
*
* Instruction (6) would be reached in two states:
* I. r6{.id=b}, r7{.id=b} via path 1-6;
* II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
*
* Use check_ids() to distinguish these states.
* ---
* Also verify that new value satisfies old value range knowledge.
*/
/*
* ADD_CONST flags must match exactly: BPF_ADD_CONST32 and
* BPF_ADD_CONST64 have different linking semantics in
* sync_linked_regs() (alu32 zero-extends, alu64 does not),
* so pruning across different flag types is unsafe.
*/
if (rold->id &&
(rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST))
return false;
/* Both have offset linkage: offsets must match */
if ((rold->id & BPF_ADD_CONST) && rold->delta != rcur->delta)
return false;
if (!check_scalar_ids(rold->id, rcur->id, idmap))
return false;
return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
case PTR_TO_MEM:
case PTR_TO_BUF:
case PTR_TO_TP_BUFFER:
/* If the new min/max/var_off satisfy the old ones and
* everything else matches, we are OK.
*/
return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
range_within(rold, rcur) &&
tnum_in(rold->var_off, rcur->var_off) &&
check_ids(rold->id, rcur->id, idmap) &&
check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
case PTR_TO_PACKET_META:
case PTR_TO_PACKET:
/* We must have at least as much range as the old ptr
* did, so that any accesses which were safe before are
* still safe. This is true even if old range < old off,
* since someone could have accessed through (ptr - k), or
* even done ptr -= k in a register, to get a safe access.
*/
if (rold->range < 0 || rcur->range < 0) {
/* special case for [BEYOND|AT]_PKT_END */
if (rold->range != rcur->range)
return false;
} else if (rold->range > rcur->range) {
return false;
}
/* id relations must be preserved */
if (!check_ids(rold->id, rcur->id, idmap))
return false;
/* new val must satisfy old val knowledge */
return range_within(rold, rcur) &&
tnum_in(rold->var_off, rcur->var_off);
case PTR_TO_STACK:
/* two stack pointers are equal only if they're pointing to
* the same stack frame, since fp-8 in foo != fp-8 in bar
*/
return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
case PTR_TO_ARENA:
return true;
case PTR_TO_INSN:
return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
default:
return regs_exact(rold, rcur, idmap);
}
}
static struct bpf_reg_state unbound_reg;
static __init int unbound_reg_init(void)
{
bpf_mark_reg_unknown_imprecise(&unbound_reg);
return 0;
}
late_initcall(unbound_reg_init);
static bool is_spilled_scalar_after(const struct bpf_stack_state *stack, int im)
{
return stack->slot_type[im] == STACK_SPILL &&
stack->spilled_ptr.type == SCALAR_VALUE;
}
static bool is_stack_misc_after(struct bpf_verifier_env *env,
struct bpf_stack_state *stack, int im)
{
u32 i;
for (i = im; i < ARRAY_SIZE(stack->slot_type); ++i) {
if ((stack->slot_type[i] == STACK_MISC) ||
((stack->slot_type[i] == STACK_INVALID || stack->slot_type[i] == STACK_POISON) &&
env->allow_uninit_stack))
continue;
return false;
}
return true;
}
static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env,
struct bpf_stack_state *stack, int im)
{
if (is_spilled_scalar_after(stack, im))
return &stack->spilled_ptr;
if (is_stack_misc_after(env, stack, im))
return &unbound_reg;
return NULL;
}
static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
struct bpf_func_state *cur, struct bpf_idmap *idmap,
enum exact_level exact)
{
int i, spi;
/* walk slots of the explored stack and ignore any additional
* slots in the current stack, since explored(safe) state
* didn't use them
*/
for (i = 0; i < old->allocated_stack; i++) {
struct bpf_reg_state *old_reg, *cur_reg;
int im = i % BPF_REG_SIZE;
spi = i / BPF_REG_SIZE;
if (exact == EXACT) {
u8 old_type = old->stack[spi].slot_type[i % BPF_REG_SIZE];
u8 cur_type = i < cur->allocated_stack ?
cur->stack[spi].slot_type[i % BPF_REG_SIZE] : STACK_INVALID;
/* STACK_INVALID and STACK_POISON are equivalent for pruning */
if (old_type == STACK_POISON)
old_type = STACK_INVALID;
if (cur_type == STACK_POISON)
cur_type = STACK_INVALID;
if (i >= cur->allocated_stack || old_type != cur_type)
return false;
}
if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID ||
old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_POISON)
continue;
if (env->allow_uninit_stack &&
old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
continue;
/* explored stack has more populated slots than current stack
* and these slots were used
*/
if (i >= cur->allocated_stack)
return false;
/*
* 64 and 32-bit scalar spills vs MISC/INVALID slots and vice versa.
* Load from MISC/INVALID slots produces unbound scalar.
* Construct a fake register for such stack and call
* regsafe() to ensure scalar ids are compared.
*/
if (im == 0 || im == 4) {
old_reg = scalar_reg_for_stack(env, &old->stack[spi], im);
cur_reg = scalar_reg_for_stack(env, &cur->stack[spi], im);
if (old_reg && cur_reg) {
if (!regsafe(env, old_reg, cur_reg, idmap, exact))
return false;
i += (im == 0 ? BPF_REG_SIZE - 1 : 3);
continue;
}
}
/* if old state was safe with misc data in the stack
* it will be safe with zero-initialized stack.
* The opposite is not true
*/
if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
continue;
if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
cur->stack[spi].slot_type[i % BPF_REG_SIZE])
/* Ex: old explored (safe) state has STACK_SPILL in
* this stack slot, but current has STACK_MISC ->
* this verifier states are not equivalent,
* return false to continue verification of this path
*/
return false;
if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
continue;
/* Both old and cur are having same slot_type */
switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
case STACK_SPILL:
/* when explored and current stack slot are both storing
* spilled registers, check that stored pointers types
* are the same as well.
* Ex: explored safe path could have stored
* (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
* but current path has stored:
* (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
* such verifier states are not equivalent.
* return false to continue verification of this path
*/
if (!regsafe(env, &old->stack[spi].spilled_ptr,
&cur->stack[spi].spilled_ptr, idmap, exact))
return false;
break;
case STACK_DYNPTR:
old_reg = &old->stack[spi].spilled_ptr;
cur_reg = &cur->stack[spi].spilled_ptr;
if (old_reg->dynptr.type != cur_reg->dynptr.type ||
old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
return false;
break;
case STACK_ITER:
old_reg = &old->stack[spi].spilled_ptr;
cur_reg = &cur->stack[spi].spilled_ptr;
/* iter.depth is not compared between states as it
* doesn't matter for correctness and would otherwise
* prevent convergence; we maintain it only to prevent
* infinite loop check triggering, see
* iter_active_depths_differ()
*/
if (old_reg->iter.btf != cur_reg->iter.btf ||
old_reg->iter.btf_id != cur_reg->iter.btf_id ||
old_reg->iter.state != cur_reg->iter.state ||
/* ignore {old_reg,cur_reg}->iter.depth, see above */
!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
return false;
break;
case STACK_IRQ_FLAG:
old_reg = &old->stack[spi].spilled_ptr;
cur_reg = &cur->stack[spi].spilled_ptr;
if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap) ||
old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class)
return false;
break;
case STACK_MISC:
case STACK_ZERO:
case STACK_INVALID:
case STACK_POISON:
continue;
/* Ensure that new unhandled slot types return false by default */
default:
return false;
}
}
return true;
}
static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur,
struct bpf_idmap *idmap)
{
int i;
if (old->acquired_refs != cur->acquired_refs)
return false;
if (old->active_locks != cur->active_locks)
return false;
if (old->active_preempt_locks != cur->active_preempt_locks)
return false;
if (old->active_rcu_locks != cur->active_rcu_locks)
return false;
if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap))
return false;
if (!check_ids(old->active_lock_id, cur->active_lock_id, idmap) ||
old->active_lock_ptr != cur->active_lock_ptr)
return false;
for (i = 0; i < old->acquired_refs; i++) {
if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) ||
old->refs[i].type != cur->refs[i].type)
return false;
switch (old->refs[i].type) {
case REF_TYPE_PTR:
case REF_TYPE_IRQ:
break;
case REF_TYPE_LOCK:
case REF_TYPE_RES_LOCK:
case REF_TYPE_RES_LOCK_IRQ:
if (old->refs[i].ptr != cur->refs[i].ptr)
return false;
break;
default:
WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type);
return false;
}
}
return true;
}
/* compare two verifier states
*
* all states stored in state_list are known to be valid, since
* verifier reached 'bpf_exit' instruction through them
*
* this function is called when verifier exploring different branches of
* execution popped from the state stack. If it sees an old state that has
* more strict register state and more strict stack state then this execution
* branch doesn't need to be explored further, since verifier already
* concluded that more strict state leads to valid finish.
*
* Therefore two states are equivalent if register state is more conservative
* and explored stack state is more conservative than the current one.
* Example:
* explored current
* (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
* (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
*
* In other words if current stack state (one being explored) has more
* valid slots than old one that already passed validation, it means
* the verifier can stop exploring and conclude that current state is valid too
*
* Similarly with registers. If explored state has register type as invalid
* whereas register type in current state is meaningful, it means that
* the current state will reach 'bpf_exit' instruction safely
*/
static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact)
{
u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before;
u16 i;
if (old->callback_depth > cur->callback_depth)
return false;
for (i = 0; i < MAX_BPF_REG; i++)
if (((1 << i) & live_regs) &&
!regsafe(env, &old->regs[i], &cur->regs[i],
&env->idmap_scratch, exact))
return false;
if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
return false;
return true;
}
static void reset_idmap_scratch(struct bpf_verifier_env *env)
{
struct bpf_idmap *idmap = &env->idmap_scratch;
idmap->tmp_id_gen = env->id_gen;
idmap->cnt = 0;
}
static bool states_equal(struct bpf_verifier_env *env,
struct bpf_verifier_state *old,
struct bpf_verifier_state *cur,
enum exact_level exact)
{
u32 insn_idx;
int i;
if (old->curframe != cur->curframe)
return false;
reset_idmap_scratch(env);
/* Verification state from speculative execution simulation
* must never prune a non-speculative execution one.
*/
if (old->speculative && !cur->speculative)
return false;
if (old->in_sleepable != cur->in_sleepable)
return false;
if (!refsafe(old, cur, &env->idmap_scratch))
return false;
/* for states to be equal callsites have to be the same
* and all frame states need to be equivalent
*/
for (i = 0; i <= old->curframe; i++) {
insn_idx = bpf_frame_insn_idx(old, i);
if (old->frame[i]->callsite != cur->frame[i]->callsite)
return false;
if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact))
return false;
}
return true;
}
/* find precise scalars in the previous equivalent state and
* propagate them into the current state
*/
static int propagate_precision(struct bpf_verifier_env *env,
const struct bpf_verifier_state *old,
struct bpf_verifier_state *cur,
bool *changed)
{
struct bpf_reg_state *state_reg;
struct bpf_func_state *state;
int i, err = 0, fr;
bool first;
for (fr = old->curframe; fr >= 0; fr--) {
state = old->frame[fr];
state_reg = state->regs;
first = true;
for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
if (state_reg->type != SCALAR_VALUE ||
!state_reg->precise)
continue;
if (env->log.level & BPF_LOG_LEVEL2) {
if (first)
verbose(env, "frame %d: propagating r%d", fr, i);
else
verbose(env, ",r%d", i);
}
bpf_bt_set_frame_reg(&env->bt, fr, i);
first = false;
}
for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
if (!bpf_is_spilled_reg(&state->stack[i]))
continue;
state_reg = &state->stack[i].spilled_ptr;
if (state_reg->type != SCALAR_VALUE ||
!state_reg->precise)
continue;
if (env->log.level & BPF_LOG_LEVEL2) {
if (first)
verbose(env, "frame %d: propagating fp%d",
fr, (-i - 1) * BPF_REG_SIZE);
else
verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
}
bpf_bt_set_frame_slot(&env->bt, fr, i);
first = false;
}
if (!first && (env->log.level & BPF_LOG_LEVEL2))
verbose(env, "\n");
}
err = bpf_mark_chain_precision(env, cur, -1, changed);
if (err < 0)
return err;
return 0;
}
#define MAX_BACKEDGE_ITERS 64
/* Propagate read and precision marks from visit->backedges[*].state->equal_state
* to corresponding parent states of visit->backedges[*].state until fixed point is reached,
* then free visit->backedges.
* After execution of this function incomplete_read_marks() will return false
* for all states corresponding to @visit->callchain.
*/
static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit)
{
struct bpf_scc_backedge *backedge;
struct bpf_verifier_state *st;
bool changed;
int i, err;
i = 0;
do {
if (i++ > MAX_BACKEDGE_ITERS) {
if (env->log.level & BPF_LOG_LEVEL2)
verbose(env, "%s: too many iterations\n", __func__);
for (backedge = visit->backedges; backedge; backedge = backedge->next)
bpf_mark_all_scalars_precise(env, &backedge->state);
break;
}
changed = false;
for (backedge = visit->backedges; backedge; backedge = backedge->next) {
st = &backedge->state;
err = propagate_precision(env, st->equal_state, st, &changed);
if (err)
return err;
}
} while (changed);
bpf_free_backedges(visit);
return 0;
}
static bool states_maybe_looping(struct bpf_verifier_state *old,
struct bpf_verifier_state *cur)
{
struct bpf_func_state *fold, *fcur;
int i, fr = cur->curframe;
if (old->curframe != fr)
return false;
fold = old->frame[fr];
fcur = cur->frame[fr];
for (i = 0; i < MAX_BPF_REG; i++)
if (memcmp(&fold->regs[i], &fcur->regs[i],
offsetof(struct bpf_reg_state, frameno)))
return false;
return true;
}
/* is_state_visited() handles iter_next() (see process_iter_next_call() for
* terminology) calls specially: as opposed to bounded BPF loops, it *expects*
* states to match, which otherwise would look like an infinite loop. So while
* iter_next() calls are taken care of, we still need to be careful and
* prevent erroneous and too eager declaration of "infinite loop", when
* iterators are involved.
*
* Here's a situation in pseudo-BPF assembly form:
*
* 0: again: ; set up iter_next() call args
* 1: r1 = &it ; <CHECKPOINT HERE>
* 2: call bpf_iter_num_next ; this is iter_next() call
* 3: if r0 == 0 goto done
* 4: ... something useful here ...
* 5: goto again ; another iteration
* 6: done:
* 7: r1 = &it
* 8: call bpf_iter_num_destroy ; clean up iter state
* 9: exit
*
* This is a typical loop. Let's assume that we have a prune point at 1:,
* before we get to `call bpf_iter_num_next` (e.g., because of that `goto
* again`, assuming other heuristics don't get in a way).
*
* When we first time come to 1:, let's say we have some state X. We proceed
* to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
* Now we come back to validate that forked ACTIVE state. We proceed through
* 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
* are converging. But the problem is that we don't know that yet, as this
* convergence has to happen at iter_next() call site only. So if nothing is
* done, at 1: verifier will use bounded loop logic and declare infinite
* looping (and would be *technically* correct, if not for iterator's
* "eventual sticky NULL" contract, see process_iter_next_call()). But we
* don't want that. So what we do in process_iter_next_call() when we go on
* another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
* a different iteration. So when we suspect an infinite loop, we additionally
* check if any of the *ACTIVE* iterator states depths differ. If yes, we
* pretend we are not looping and wait for next iter_next() call.
*
* This only applies to ACTIVE state. In DRAINED state we don't expect to
* loop, because that would actually mean infinite loop, as DRAINED state is
* "sticky", and so we'll keep returning into the same instruction with the
* same state (at least in one of possible code paths).
*
* This approach allows to keep infinite loop heuristic even in the face of
* active iterator. E.g., C snippet below is and will be detected as
* infinitely looping:
*
* struct bpf_iter_num it;
* int *p, x;
*
* bpf_iter_num_new(&it, 0, 10);
* while ((p = bpf_iter_num_next(&t))) {
* x = p;
* while (x--) {} // <<-- infinite loop here
* }
*
*/
static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
{
struct bpf_reg_state *slot, *cur_slot;
struct bpf_func_state *state;
int i, fr;
for (fr = old->curframe; fr >= 0; fr--) {
state = old->frame[fr];
for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
if (state->stack[i].slot_type[0] != STACK_ITER)
continue;
slot = &state->stack[i].spilled_ptr;
if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
continue;
cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
if (cur_slot->iter.depth != slot->iter.depth)
return true;
}
}
return false;
}
static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
{
struct bpf_func_state *func;
struct bpf_reg_state *reg;
int i, j;
for (i = 0; i <= st->curframe; i++) {
func = st->frame[i];
for (j = 0; j < BPF_REG_FP; j++) {
reg = &func->regs[j];
if (reg->type != SCALAR_VALUE)
continue;
reg->precise = false;
}
for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
if (!bpf_is_spilled_reg(&func->stack[j]))
continue;
reg = &func->stack[j].spilled_ptr;
if (reg->type != SCALAR_VALUE)
continue;
reg->precise = false;
}
}
}
int bpf_is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
struct bpf_verifier_state_list *new_sl;
struct bpf_verifier_state_list *sl;
struct bpf_verifier_state *cur = env->cur_state, *new;
bool force_new_state, add_new_state, loop;
int n, err, states_cnt = 0;
struct list_head *pos, *tmp, *head;
force_new_state = env->test_state_freq || bpf_is_force_checkpoint(env, insn_idx) ||
/* Avoid accumulating infinitely long jmp history */
cur->jmp_history_cnt > 40;
/* bpf progs typically have pruning point every 4 instructions
* http://vger.kernel.org/bpfconf2019.html#session-1
* Do not add new state for future pruning if the verifier hasn't seen
* at least 2 jumps and at least 8 instructions.
* This heuristics helps decrease 'total_states' and 'peak_states' metric.
* In tests that amounts to up to 50% reduction into total verifier
* memory consumption and 20% verifier time speedup.
*/
add_new_state = force_new_state;
if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
env->insn_processed - env->prev_insn_processed >= 8)
add_new_state = true;
/* keep cleaning the current state as registers/stack become dead */
err = clean_verifier_state(env, cur);
if (err)
return err;
loop = false;
head = bpf_explored_state(env, insn_idx);
list_for_each_safe(pos, tmp, head) {
sl = container_of(pos, struct bpf_verifier_state_list, node);
states_cnt++;
if (sl->state.insn_idx != insn_idx)
continue;
if (sl->state.branches) {
struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
if (frame->in_async_callback_fn &&
frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
/* Different async_entry_cnt means that the verifier is
* processing another entry into async callback.
* Seeing the same state is not an indication of infinite
* loop or infinite recursion.
* But finding the same state doesn't mean that it's safe
* to stop processing the current state. The previous state
* hasn't yet reached bpf_exit, since state.branches > 0.
* Checking in_async_callback_fn alone is not enough either.
* Since the verifier still needs to catch infinite loops
* inside async callbacks.
*/
goto skip_inf_loop_check;
}
/* BPF open-coded iterators loop detection is special.
* states_maybe_looping() logic is too simplistic in detecting
* states that *might* be equivalent, because it doesn't know
* about ID remapping, so don't even perform it.
* See process_iter_next_call() and iter_active_depths_differ()
* for overview of the logic. When current and one of parent
* states are detected as equivalent, it's a good thing: we prove
* convergence and can stop simulating further iterations.
* It's safe to assume that iterator loop will finish, taking into
* account iter_next() contract of eventually returning
* sticky NULL result.
*
* Note, that states have to be compared exactly in this case because
* read and precision marks might not be finalized inside the loop.
* E.g. as in the program below:
*
* 1. r7 = -16
* 2. r6 = bpf_get_prandom_u32()
* 3. while (bpf_iter_num_next(&fp[-8])) {
* 4. if (r6 != 42) {
* 5. r7 = -32
* 6. r6 = bpf_get_prandom_u32()
* 7. continue
* 8. }
* 9. r0 = r10
* 10. r0 += r7
* 11. r8 = *(u64 *)(r0 + 0)
* 12. r6 = bpf_get_prandom_u32()
* 13. }
*
* Here verifier would first visit path 1-3, create a checkpoint at 3
* with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
* not have read or precision mark for r7 yet, thus inexact states
* comparison would discard current state with r7=-32
* => unsafe memory access at 11 would not be caught.
*/
if (is_iter_next_insn(env, insn_idx)) {
if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
struct bpf_func_state *cur_frame;
struct bpf_reg_state *iter_state, *iter_reg;
int spi;
cur_frame = cur->frame[cur->curframe];
/* btf_check_iter_kfuncs() enforces that
* iter state pointer is always the first arg
*/
iter_reg = &cur_frame->regs[BPF_REG_1];
/* current state is valid due to states_equal(),
* so we can assume valid iter and reg state,
* no need for extra (re-)validations
*/
spi = bpf_get_spi(iter_reg->var_off.value);
iter_state = &bpf_func(env, iter_reg)->stack[spi].spilled_ptr;
if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
loop = true;
goto hit;
}
}
goto skip_inf_loop_check;
}
if (is_may_goto_insn_at(env, insn_idx)) {
if (sl->state.may_goto_depth != cur->may_goto_depth &&
states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
loop = true;
goto hit;
}
}
if (bpf_calls_callback(env, insn_idx)) {
if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
loop = true;
goto hit;
}
goto skip_inf_loop_check;
}
/* attempt to detect infinite loop to avoid unnecessary doomed work */
if (states_maybe_looping(&sl->state, cur) &&
states_equal(env, &sl->state, cur, EXACT) &&
!iter_active_depths_differ(&sl->state, cur) &&
sl->state.may_goto_depth == cur->may_goto_depth &&
sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
verbose_linfo(env, insn_idx, "; ");
verbose(env, "infinite loop detected at insn %d\n", insn_idx);
verbose(env, "cur state:");
print_verifier_state(env, cur, cur->curframe, true);
verbose(env, "old state:");
print_verifier_state(env, &sl->state, cur->curframe, true);
return -EINVAL;
}
/* if the verifier is processing a loop, avoid adding new state
* too often, since different loop iterations have distinct
* states and may not help future pruning.
* This threshold shouldn't be too low to make sure that
* a loop with large bound will be rejected quickly.
* The most abusive loop will be:
* r1 += 1
* if r1 < 1000000 goto pc-2
* 1M insn_procssed limit / 100 == 10k peak states.
* This threshold shouldn't be too high either, since states
* at the end of the loop are likely to be useful in pruning.
*/
skip_inf_loop_check:
if (!force_new_state &&
env->jmps_processed - env->prev_jmps_processed < 20 &&
env->insn_processed - env->prev_insn_processed < 100)
add_new_state = false;
goto miss;
}
/* See comments for mark_all_regs_read_and_precise() */
loop = incomplete_read_marks(env, &sl->state);
if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) {
hit:
sl->hit_cnt++;
/* if previous state reached the exit with precision and
* current state is equivalent to it (except precision marks)
* the precision needs to be propagated back in
* the current state.
*/
err = 0;
if (bpf_is_jmp_point(env, env->insn_idx))
err = bpf_push_jmp_history(env, cur, 0, 0);
err = err ? : propagate_precision(env, &sl->state, cur, NULL);
if (err)
return err;
/* When processing iterator based loops above propagate_liveness and
* propagate_precision calls are not sufficient to transfer all relevant
* read and precision marks. E.g. consider the following case:
*
* .-> A --. Assume the states are visited in the order A, B, C.
* | | | Assume that state B reaches a state equivalent to state A.
* | v v At this point, state C is not processed yet, so state A
* '-- B C has not received any read or precision marks from C.
* Thus, marks propagated from A to B are incomplete.
*
* The verifier mitigates this by performing the following steps:
*
* - Prior to the main verification pass, strongly connected components
* (SCCs) are computed over the program's control flow graph,
* intraprocedurally.
*
* - During the main verification pass, `maybe_enter_scc()` checks
* whether the current verifier state is entering an SCC. If so, an
* instance of a `bpf_scc_visit` object is created, and the state
* entering the SCC is recorded as the entry state.
*
* - This instance is associated not with the SCC itself, but with a
* `bpf_scc_callchain`: a tuple consisting of the call sites leading to
* the SCC and the SCC id. See `compute_scc_callchain()`.
*
* - When a verification path encounters a `states_equal(...,
* RANGE_WITHIN)` condition, there exists a call chain describing the
* current state and a corresponding `bpf_scc_visit` instance. A copy
* of the current state is created and added to
* `bpf_scc_visit->backedges`.
*
* - When a verification path terminates, `maybe_exit_scc()` is called
* from `bpf_update_branch_counts()`. For states with `branches == 0`, it
* checks whether the state is the entry state of any `bpf_scc_visit`
* instance. If it is, this indicates that all paths originating from
* this SCC visit have been explored. `propagate_backedges()` is then
* called, which propagates read and precision marks through the
* backedges until a fixed point is reached.
* (In the earlier example, this would propagate marks from A to B,
* from C to A, and then again from A to B.)
*
* A note on callchains
* --------------------
*
* Consider the following example:
*
* void foo() { loop { ... SCC#1 ... } }
* void main() {
* A: foo();
* B: ...
* C: foo();
* }
*
* Here, there are two distinct callchains leading to SCC#1:
* - (A, SCC#1)
* - (C, SCC#1)
*
* Each callchain identifies a separate `bpf_scc_visit` instance that
* accumulates backedge states. The `propagate_{liveness,precision}()`
* functions traverse the parent state of each backedge state, which
* means these parent states must remain valid (i.e., not freed) while
* the corresponding `bpf_scc_visit` instance exists.
*
* Associating `bpf_scc_visit` instances directly with SCCs instead of
* callchains would break this invariant:
* - States explored during `C: foo()` would contribute backedges to
* SCC#1, but SCC#1 would only be exited once the exploration of
* `A: foo()` completes.
* - By that time, the states explored between `A: foo()` and `C: foo()`
* (i.e., `B: ...`) may have already been freed, causing the parent
* links for states from `C: foo()` to become invalid.
*/
if (loop) {
struct bpf_scc_backedge *backedge;
backedge = kzalloc_obj(*backedge,
GFP_KERNEL_ACCOUNT);
if (!backedge)
return -ENOMEM;
err = bpf_copy_verifier_state(&backedge->state, cur);
backedge->state.equal_state = &sl->state;
backedge->state.insn_idx = insn_idx;
err = err ?: add_scc_backedge(env, &sl->state, backedge);
if (err) {
bpf_free_verifier_state(&backedge->state, false);
kfree(backedge);
return err;
}
}
return 1;
}
miss:
/* when new state is not going to be added do not increase miss count.
* Otherwise several loop iterations will remove the state
* recorded earlier. The goal of these heuristics is to have
* states from some iterations of the loop (some in the beginning
* and some at the end) to help pruning.
*/
if (add_new_state)
sl->miss_cnt++;
/* heuristic to determine whether this state is beneficial
* to keep checking from state equivalence point of view.
* Higher numbers increase max_states_per_insn and verification time,
* but do not meaningfully decrease insn_processed.
* 'n' controls how many times state could miss before eviction.
* Use bigger 'n' for checkpoints because evicting checkpoint states
* too early would hinder iterator convergence.
*/
n = bpf_is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
if (sl->miss_cnt > sl->hit_cnt * n + n) {
/* the state is unlikely to be useful. Remove it to
* speed up verification
*/
sl->in_free_list = true;
list_del(&sl->node);
list_add(&sl->node, &env->free_list);
env->free_list_size++;
env->explored_states_size--;
maybe_free_verifier_state(env, sl);
}
}
if (env->max_states_per_insn < states_cnt)
env->max_states_per_insn = states_cnt;
if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
return 0;
if (!add_new_state)
return 0;
/* There were no equivalent states, remember the current one.
* Technically the current state is not proven to be safe yet,
* but it will either reach outer most bpf_exit (which means it's safe)
* or it will be rejected. When there are no loops the verifier won't be
* seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
* again on the way to bpf_exit.
* When looping the sl->state.branches will be > 0 and this state
* will not be considered for equivalence until branches == 0.
*/
new_sl = kzalloc_obj(struct bpf_verifier_state_list, GFP_KERNEL_ACCOUNT);
if (!new_sl)
return -ENOMEM;
env->total_states++;
env->explored_states_size++;
update_peak_states(env);
env->prev_jmps_processed = env->jmps_processed;
env->prev_insn_processed = env->insn_processed;
/* forget precise markings we inherited, see __mark_chain_precision */
if (env->bpf_capable)
mark_all_scalars_imprecise(env, cur);
bpf_clear_singular_ids(env, cur);
/* add new state to the head of linked list */
new = &new_sl->state;
err = bpf_copy_verifier_state(new, cur);
if (err) {
bpf_free_verifier_state(new, false);
kfree(new_sl);
return err;
}
new->insn_idx = insn_idx;
verifier_bug_if(new->branches != 1, env,
"%s:branches_to_explore=%d insn %d",
__func__, new->branches, insn_idx);
err = maybe_enter_scc(env, new);
if (err) {
bpf_free_verifier_state(new, false);
kfree(new_sl);
return err;
}
cur->parent = new;
cur->first_insn_idx = insn_idx;
cur->dfs_depth = new->dfs_depth + 1;
bpf_clear_jmp_history(cur);
list_add(&new_sl->node, head);
return 0;
}