codel: annotate data-races in codel_dump_stats()

codel_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.

Add READ_ONCE()/WRITE_ONCE() annotations.

Alternative would be to acquire the qdisc spinlock, but our long-term
goal is to make qdisc dump operations lockless as much as we can.

tc_codel_xstats fields don't need to be latched atomically,
otherwise this bug would have been caught earlier.

No change in kernel size:

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 0/0 grow/shrink: 1/1 up/down: 3/-1 (2)
Function                                     old     new   delta
codel_qdisc_dequeue                         2462    2465      +3
codel_dump_stats                             250     249      -1
Total: Before=29739919, After=29739921, chg +0.00%

Fixes: 76e3cc126b ("codel: Controlled Delay AQM")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260407143053.1570620-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Eric Dumazet
2026-04-07 14:30:53 +00:00
committed by Jakub Kicinski
parent dbc2bb4e87
commit ea25e03da7
2 changed files with 35 additions and 32 deletions

View File

@@ -120,10 +120,10 @@ static bool codel_should_drop(const struct sk_buff *skb,
}
skb_len = skb_len_func(skb);
vars->ldelay = now - skb_time_func(skb);
WRITE_ONCE(vars->ldelay, now - skb_time_func(skb));
if (unlikely(skb_len > stats->maxpacket))
stats->maxpacket = skb_len;
WRITE_ONCE(stats->maxpacket, skb_len);
if (codel_time_before(vars->ldelay, params->target) ||
*backlog <= params->mtu) {
@@ -159,7 +159,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
if (!skb) {
vars->first_above_time = 0;
vars->dropping = false;
WRITE_ONCE(vars->dropping, false);
return skb;
}
now = codel_get_time();
@@ -168,7 +168,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
if (vars->dropping) {
if (!drop) {
/* sojourn time below target - leave dropping state */
vars->dropping = false;
WRITE_ONCE(vars->dropping, false);
} else if (codel_time_after_eq(now, vars->drop_next)) {
/* It's time for the next drop. Drop the current
* packet and dequeue the next. The dequeue might
@@ -180,16 +180,18 @@ static struct sk_buff *codel_dequeue(void *ctx,
*/
while (vars->dropping &&
codel_time_after_eq(now, vars->drop_next)) {
vars->count++; /* dont care of possible wrap
* since there is no more divide
*/
/* dont care of possible wrap
* since there is no more divide.
*/
WRITE_ONCE(vars->count, vars->count + 1);
codel_Newton_step(vars);
if (params->ecn && INET_ECN_set_ce(skb)) {
stats->ecn_mark++;
vars->drop_next =
WRITE_ONCE(stats->ecn_mark,
stats->ecn_mark + 1);
WRITE_ONCE(vars->drop_next,
codel_control_law(vars->drop_next,
params->interval,
vars->rec_inv_sqrt);
vars->rec_inv_sqrt));
goto end;
}
stats->drop_len += skb_len_func(skb);
@@ -202,13 +204,13 @@ static struct sk_buff *codel_dequeue(void *ctx,
skb_time_func,
backlog, now)) {
/* leave dropping state */
vars->dropping = false;
WRITE_ONCE(vars->dropping, false);
} else {
/* and schedule the next drop */
vars->drop_next =
WRITE_ONCE(vars->drop_next,
codel_control_law(vars->drop_next,
params->interval,
vars->rec_inv_sqrt);
vars->rec_inv_sqrt));
}
}
}
@@ -216,7 +218,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
u32 delta;
if (params->ecn && INET_ECN_set_ce(skb)) {
stats->ecn_mark++;
WRITE_ONCE(stats->ecn_mark, stats->ecn_mark + 1);
} else {
stats->drop_len += skb_len_func(skb);
drop_func(skb, ctx);
@@ -227,7 +229,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
stats, skb_len_func,
skb_time_func, backlog, now);
}
vars->dropping = true;
WRITE_ONCE(vars->dropping, true);
/* if min went above target close to when we last went below it
* assume that the drop rate that controlled the queue on the
* last cycle is a good starting point to control it now.
@@ -236,19 +238,20 @@ static struct sk_buff *codel_dequeue(void *ctx,
if (delta > 1 &&
codel_time_before(now - vars->drop_next,
16 * params->interval)) {
vars->count = delta;
WRITE_ONCE(vars->count, delta);
/* we dont care if rec_inv_sqrt approximation
* is not very precise :
* Next Newton steps will correct it quadratically.
*/
codel_Newton_step(vars);
} else {
vars->count = 1;
WRITE_ONCE(vars->count, 1);
vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
}
vars->lastcount = vars->count;
vars->drop_next = codel_control_law(now, params->interval,
vars->rec_inv_sqrt);
WRITE_ONCE(vars->lastcount, vars->count);
WRITE_ONCE(vars->drop_next,
codel_control_law(now, params->interval,
vars->rec_inv_sqrt));
}
end:
if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) {
@@ -262,7 +265,7 @@ end:
params->ce_threshold_selector));
}
if (set_ce && INET_ECN_set_ce(skb))
stats->ce_mark++;
WRITE_ONCE(stats->ce_mark, stats->ce_mark + 1);
}
return skb;
}